@inproceedings{jin-etal-2022-plug,
title = "Plug and Play Knowledge Distillation for k{NN}-{LM} with External Logits",
author = "Jin, Xuyang and
Ge, Tao and
Wei, Furu",
editor = "He, Yulan and
Ji, Heng and
Li, Sujian and
Liu, Yang and
Chang, Chua-Hui",
booktitle = "Proceedings of the 2nd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 12th International Joint Conference on Natural Language Processing (Volume 2: Short Papers)",
month = nov,
year = "2022",
address = "Online only",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2022.aacl-short.57/",
doi = "10.18653/v1/2022.aacl-short.57",
pages = "463--469",
abstract = "Despite the promising evaluation results by knowledge distillation (KD) in natural language understanding (NLU) and sequence-to-sequence (seq2seq) tasks, KD for causal language modeling (LM) remains a challenge. In this paper, we present a novel perspective of knowledge distillation by proposing plug and play knowledge distillation (PP-KD) to improve a (student) kNN-LM that is the state-of-the-art in causal language modeling by leveraging external logits from either a powerful or a heterogeneous (teacher) LM. Unlike conventional logit-based KD where the teacher{'}s knowledge is built-in during training, PP-KD is plug and play: it stores the teacher{'}s knowledge (i.e., logits) externally and uses the teacher{'}s logits of the retrieved k-nearest neighbors during kNN-LM inference at test time. In contrast to marginal perplexity improvement by logit-based KD in conventional neural (causal) LM, PP-KD achieves a significant improvement, enhancing the kNN-LMs in multiple language modeling datasets, showing a novel and promising perspective for causal LM distillation."
}
Markdown (Informal)
[Plug and Play Knowledge Distillation for kNN-LM with External Logits](https://preview.aclanthology.org/fix-sig-urls/2022.aacl-short.57/) (Jin et al., AACL-IJCNLP 2022)
ACL
- Xuyang Jin, Tao Ge, and Furu Wei. 2022. Plug and Play Knowledge Distillation for kNN-LM with External Logits. In Proceedings of the 2nd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 12th International Joint Conference on Natural Language Processing (Volume 2: Short Papers), pages 463–469, Online only. Association for Computational Linguistics.