@inproceedings{wang-etal-2026-exploring-attention,
title = "Exploring Attention Attractors in Large Language Models",
author = "Wang, Ziheng and
Yue, Zihao and
Wang, Wenxuan and
Jin, Qin",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.acl-long.51/",
pages = "1148--1160",
ISBN = "979-8-89176-390-6",
abstract = "This paper explores attention attractors, tokens that draw significantly high attention, in large language models. We analyze them from three perspectives: (1) Functionality: We demonstrate their role in aggregating information from preceding contexts to facilitate future predictions. (2) Distribution: Through layer-wise and token-wise analysis, we reveal that attention attractors are widely distributed across layers but predominantly originate from low-semantic words like ``{\_}the''. (3) Mechanism: We demonstrate the correlation between attention weights allocated to tokens with their specific activation dimension values. We hope these findings provide new insights into the attention mechanisms of large language models and inspire further exploration."
}Markdown (Informal)
[Exploring Attention Attractors in Large Language Models](https://preview.aclanthology.org/ingest-acl/2026.acl-long.51/) (Wang et al., ACL 2026)
ACL
- Ziheng Wang, Zihao Yue, Wenxuan Wang, and Qin Jin. 2026. Exploring Attention Attractors in Large Language Models. In Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 1148–1160, San Diego, California, United States. Association for Computational Linguistics.