@inproceedings{cao-etal-2024-head,
title = "Head-wise Shareable Attention for Large Language Models",
author = "Cao, Zouying and
Yang, Yifei and
Zhao, Hai",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.findings-emnlp.144/",
doi = "10.18653/v1/2024.findings-emnlp.144",
pages = "2555--2571"
}
Markdown (Informal)
[Head-wise Shareable Attention for Large Language Models](https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.findings-emnlp.144/) (Cao et al., Findings 2024)
ACL