@inproceedings{setiawan-2024-accurate,
title = "Accurate Knowledge Distillation via n-best Reranking",
author = "Setiawan, Hendra",
editor = "Duh, Kevin and
Gomez, Helena and
Bethard, Steven",
booktitle = "Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.naacl-long.72/",
doi = "10.18653/v1/2024.naacl-long.72",
pages = "1330--1345",
abstract = "We propose utilizing n-best reranking to enhance Sequence-Level Knowledge Distillation (Kim and Rush, 2016) where we extract pseudo-labels for student model`s training data from top n-best hypotheses and leverage a diverse set of models with different inductive biases, objective functions or architectures, including some publicly-available large language models, to pick the highest-quality hypotheses as labels. The effectiveness of our proposal is validated through experiments on the WMT`21 German {\ensuremath{\leftrightarrow}} English and Chinese {\ensuremath{\leftrightarrow}} English translation tasks. Our results demonstrate that utilizing pseudo-labels generated by our n-best reranker leads to a significantly more accurate student model. In fact, our best student model achieves comparable accuracy to a large translation model from (Tran et al., 2021) with 4.7 billion parameters, while having two orders of magnitude fewer parameters."
}
Markdown (Informal)
[Accurate Knowledge Distillation via n-best Reranking](https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.naacl-long.72/) (Setiawan, NAACL 2024)
ACL
- Hendra Setiawan. 2024. Accurate Knowledge Distillation via n-best Reranking. In Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers), pages 1330–1345, Mexico City, Mexico. Association for Computational Linguistics.