@inproceedings{wang-etal-2025-lost,
title = "Lost in the Distance: Large Language Models Struggle to Capture Long-Distance Relational Knowledge",
author = "Wang, Meiyun and
Kojima, Takeshi and
Iwasawa, Yusuke and
Matsuo, Yutaka",
editor = "Chiruzzo, Luis and
Ritter, Alan and
Wang, Lu",
booktitle = "Findings of the Association for Computational Linguistics: NAACL 2025",
month = apr,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2025.findings-naacl.256/",
pages = "4536--4544",
ISBN = "979-8-89176-195-7",
abstract = "Large language models (LLMs) have demonstrated impressive capabilities in handling long contexts, but challenges remain in capturing relational knowledge spread far apart within text. Connecting long-distance knowledge is important for solving tasks as the context length increases: imagine reading a lengthy detective novel where seemingly trivial information introduced early on often becomes essential during the climactic reveal of the culprit. In this study, we expose the ``Lost in the Distance'' phenomenon, where LLM performance of capturing the relational knowledge degrades significantly when the relational knowledge is separated by noise, i.e., unrelated sentences to solve a task. Specifically, we design an experiment in which we insert artificial noise between two related elements and observe model performance as the distance between them increases. Our findings show that while LLMs can handle edge noise with little impact, their ability to reason about distant relationships declines sharply as the intervening noise grows. These findings are consistent in both forward-looking prediction and backward-looking prediction settings. We validate this across various models (GPT-4, Gemini-1.5-pro, GPT-4o-mini, Gemini-1.5-flash, Claude-3.5-Sonnet) and tasks (causal reasoning and knowledge extraction). These results reveal a significant limitation in how LLMs process relational knowledge over long contexts. We release our code and data to support further research."
}
Markdown (Informal)
[Lost in the Distance: Large Language Models Struggle to Capture Long-Distance Relational Knowledge](https://preview.aclanthology.org/fix-sig-urls/2025.findings-naacl.256/) (Wang et al., Findings 2025)
ACL