@inproceedings{moon-lim-2026-needlechain,
title = "{N}eedle{C}hain: Measuring Intact Context Comprehension Capability of Large Language Models",
author = "Moon, Hyeonseok and
Lim, Heuiseok",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.findings-acl.1637/",
pages = "32718--32730",
ISBN = "979-8-89176-395-1",
abstract = "Recent reports suggest that LLMs can handle increasingly long contexts. However, many existing benchmarks for context understanding embed substantial query-irrelevant content, which shifts evaluation toward retrieving relevant snippets rather than fully integrating all provided information. Under this setting, we view that current benchmarks can overestimate true context-understanding ability of LLMs. In particular, we demonstrate that when the context consists entirely of query-relevant text, even advanced models such as GPT-4o fail to reliably integrate inputs as short as 200 tokens. To evaluate this capability more rigorously, we introduce NeedleChain, a benchmark designed to test whether models can faithfully incorporate all given evidence. NeedleChain includes three variants that differ in the required order of comprehension, along with a parallel benchmark based on the needle-in-a-haystack(NIAH) paradigm. By comparing these variants, NeedleChain enables a more comprehensive assessment of context understanding. We further propose a training-free strategy that encourages models to reflect all available information, ROPE contraction, highlighting the importance of full-context integration and pointing to new directions for improving reliable reasoning over context."
}Markdown (Informal)
[NeedleChain: Measuring Intact Context Comprehension Capability of Large Language Models](https://preview.aclanthology.org/ingest-acl/2026.findings-acl.1637/) (Moon & Lim, Findings 2026)
ACL