@inproceedings{han-ji-2025-computation,
title = "Computation Mechanism Behind {LLM} Position Generalization",
author = "Han, Chi and
Ji, Heng",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingestion-acl-25/2025.acl-long.953/",
pages = "19408--19424",
ISBN = "979-8-89176-251-0",
abstract = "Most written natural languages are composed of sequences of words and sentences. Similar to humans, large language models (LLMs) exhibit flexibility in handling textual positions - a phenomenon we term Position Generalization. They can understand texts with position perturbations and generalize to longer texts than those encountered during training with the latest techniques. These phenomena suggest that LLMs handle positions in a tolerant manner, but how LLMs computationally process positional relevance remains largely unexplored. In this work, we show how LLMs enforce certain computational mechanisms to allow for the aforementioned tolerance in position perturbations. Despite the complex design of the self-attention mechanism, in this work, LLMs are revealed to learn a counterintuitive disentanglement of attention logits, where their values show a 0.959 linear correlation with an approximation of the arithmetic sum of positional relevance and semantic importance. Furthermore, we identify a prevalent pattern in intermediate features that enables this effect, suggesting that it is a learned behavior rather than a natural result of the model architecture. Based on these findings, we provide computational explanations and criteria for the aforementioned position flexibilities observed in LLMs."
}
Markdown (Informal)
[Computation Mechanism Behind LLM Position Generalization](https://preview.aclanthology.org/ingestion-acl-25/2025.acl-long.953/) (Han & Ji, ACL 2025)
ACL