@inproceedings{li-etal-2024-unveiling,
title = "Unveiling the Magic: Investigating Attention Distillation in Retrieval-Augmented Generation",
author = "Li, Zizhong and
Zhang, Haopeng and
Zhang, Jiawei",
editor = "Duh, Kevin and
Gomez, Helena and
Bethard, Steven",
booktitle = "Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 2: Short Papers)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.naacl-short.65/",
doi = "10.18653/v1/2024.naacl-short.65",
pages = "745--754",
abstract = "Retrieval-augmented generation framework addresses the limitations of large language models by enabling real-time knowledge updates for more accurate answers. An efficient way in the training phase of retrieval-augmented models is attention distillation, which uses attention scores as supervision signals instead of manually annotated query-document pairs. Despite its growing popularity, the detailed mechanisms behind the success of attention distillation remain unexplored, particularly the specific patterns it leverages to benefit training. In this paper, we address this gap by conducting a comprehensive investigation of attention distillation workflow and identifying key factors influencing the learning performance of retrieval-augmented language models. We further propose several insightful indicators for optimizing models' training methods and avoiding ineffective training."
}
Markdown (Informal)
[Unveiling the Magic: Investigating Attention Distillation in Retrieval-Augmented Generation](https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.naacl-short.65/) (Li et al., NAACL 2024)
ACL