@inproceedings{li-etal-2025-training-free,
title = "A Training-Free Length Extrapolation Approach for {LLM}s: Greedy Attention Logit Interpolation",
author = "Li, Yan and
Zhang, Tianyi and
Li, Zechuan and
Han, Caren",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/name-variant-enfa-fane/2025.emnlp-main.443/",
doi = "10.18653/v1/2025.emnlp-main.443",
pages = "8784--8804",
ISBN = "979-8-89176-332-6",
abstract = "Transformer-based Large Language Models (LLMs) struggle with inputs exceeding their training context window due to positional out-of-distribution (O.O.D.) issues that disrupt attention. Existing solutions, including fine-tuning and training-free methods, face challenges like inefficiency, redundant interpolation, logit outliers, or loss of local positional information. We propose Greedy Attention Logit Interpolation (GALI), a training-free method that improves length extrapolation by greedily reusing pretrained positional intervals and interpolating attention logits to eliminate outliers. GALI achieves stable and superior performance across a wide range of long-context tasks without requiring input-length-specific tuning. Our analysis further reveals that LLMs interpret positional intervals unevenly and that restricting interpolation to narrower ranges improves performance, even on short-context tasks. GALI represents a step toward more robust and generalizable long-text processing in LLMs."
}Markdown (Informal)
[A Training-Free Length Extrapolation Approach for LLMs: Greedy Attention Logit Interpolation](https://preview.aclanthology.org/name-variant-enfa-fane/2025.emnlp-main.443/) (Li et al., EMNLP 2025)
ACL