@inproceedings{guo-etal-2024-attention,
title = "Attention Score is not All You Need for Token Importance Indicator in {KV} Cache Reduction: Value Also Matters",
author = "Guo, Zhiyu and
Kamigaito, Hidetaka and
Watanabe, Taro",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2024.emnlp-main.1178/",
doi = "10.18653/v1/2024.emnlp-main.1178",
pages = "21158--21166"
}
Markdown (Informal)
[Attention Score is not All You Need for Token Importance Indicator in KV Cache Reduction: Value Also Matters](https://preview.aclanthology.org/add-emnlp-2024-awards/2024.emnlp-main.1178/) (Guo et al., EMNLP 2024)
ACL