@inproceedings{jerad-etal-2025-unique,
title = "Unique Hard Attention: A Tale of Two Sides",
author = "Jerad, Selim and
Svete, Anej and
Li, Jiaoda and
Cotterell, Ryan",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/landing_page/2025.acl-short.76/",
pages = "977--996",
ISBN = "979-8-89176-252-7",
abstract = "Understanding the expressive power of transformers has recently attracted attention, as it offers insights into their abilities and limitations. Many studies analyze unique hard attention transformers, where attention selects a single position that maximizes the attention scores. When multiple positions achieve the maximum score, either the rightmost or the leftmost of those is chosen. In this paper, we highlight the importance of this seeming triviality. Recently, finite-precision transformers with both leftmost- and rightmost-hard attention were shown to be equivalent to Linear Temporal Logic (LTL). We show that this no longer holds with only leftmost-hard attention{---}in that case, they correspond to a \textit{strictly weaker} fragment of LTL. Furthermore, we show that models with leftmost-hard attention are equivalent to \textit{soft} attention, suggesting they may better approximate real-world transformers than right-attention models. These findings refine the landscape of transformer expressivity and underscore the role of attention directionality."
}
Markdown (Informal)
[Unique Hard Attention: A Tale of Two Sides](https://preview.aclanthology.org/landing_page/2025.acl-short.76/) (Jerad et al., ACL 2025)
ACL
- Selim Jerad, Anej Svete, Jiaoda Li, and Ryan Cotterell. 2025. Unique Hard Attention: A Tale of Two Sides. In Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers), pages 977–996, Vienna, Austria. Association for Computational Linguistics.