@inproceedings{wu-etal-2025-natural-context,
title = "Natural Context Drift Undermines the Natural Language Understanding of Large Language Models",
author = "Wu, Yulong and
Schlegel, Viktor and
Batista-Navarro, Riza",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/name-variant-enfa-fane/2025.findings-emnlp.65/",
doi = "10.18653/v1/2025.findings-emnlp.65",
pages = "1248--1259",
ISBN = "979-8-89176-335-7",
abstract = "How does the natural evolution of context paragraphs affect Question Answering (QA) in generative Large Language Models (LLMs)? To address this, we propose a framework for curating naturally evolved, human-edited variants of reading passages from contemporary QA benchmarks and for analysing LLM performance across a range of semantic similarity scores, which quantify how closely each variant aligns with Wikipedia content on the same article topic that the LLM saw during pretraining. Using this framework, we evaluate 6 QA datasets and 8 LLMs with publicly available training data. Our experiments reveal that LLM performance declines as reading passages naturally diverge from the versions encountered during pretraining{--}even when the question and all necessary information remains present at inference time. For instance, average accuracy on BoolQ drops by over 30{\%} from the highest to lowest similarity bins. This finding suggests that natural text evolution may pose a significant challenge to the language understanding capabilities of fully open-source LLMs."
}Markdown (Informal)
[Natural Context Drift Undermines the Natural Language Understanding of Large Language Models](https://preview.aclanthology.org/name-variant-enfa-fane/2025.findings-emnlp.65/) (Wu et al., Findings 2025)
ACL