@inproceedings{ran-milo-etal-2026-mechanistic,
title = "A Mechanistic Account of Attention Sinks in {GPT}-2: One Circuit, Broader Implications for Mitigation",
author = "Ran-Milo, Yuval and
Ofek, Hila and
Mendel, Shahar",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 2: Short Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.acl-short.9/",
pages = "90--98",
ISBN = "979-8-89176-391-3",
abstract = "Transformers commonly exhibit an attention sink: disproportionately high attention to the first position. We study this behavior in GPT-2{--}style models with learned query biases and absolute positional embeddings. Combining structural analysis with causal interventions, validated across natural-language, mathematical, and code inputs, we find that the sink arises from the interaction among (i) a learned query bias, (ii) the first-layer MLP transformation of the positional encoding, and (iii) structure in the key projection. Crucially, each component we identify is individually dispensable: architectures omitting each of them robustly exhibit sinks. This indicates that attention sinks may arise through distinct circuits across architectures. These findings inform mitigation of sinks, and motivate broader investigation into why sinks emerges."
}Markdown (Informal)
[A Mechanistic Account of Attention Sinks in GPT-2: One Circuit, Broader Implications for Mitigation](https://preview.aclanthology.org/ingest-acl/2026.acl-short.9/) (Ran-Milo et al., ACL 2026)
ACL