@inproceedings{sun-etal-2026-stop,
title = "Stop When Enough: Adaptive Early-Stopping for Chain-of-Thought Reasoning",
author = "Sun, Renliang and
Cheng, Wei and
Li, Dawei and
Chen, Haifeng and
Wang, Wei",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.acl-long.1256/",
pages = "27250--27268",
ISBN = "979-8-89176-390-6",
abstract = "Chain-of-Thought (CoT) reasoning has driven recent gains of large language models (LLMs) on reasoning-intensive tasks by externalizing intermediate steps. However, excessive or redundant reasoning {---} so-called overthinking {---} can increase inference costs and lead LLMs toward incorrect conclusions. In this paper, we present $\textbf{REFRAIN}$ ($\underline{REF}$lective-$\underline{R}$edundancy for $\underline{A}$daptive $\underline{IN}$ference), a training-free framework that adaptively determines when to stop reasoning to mitigate overthinking. REFRAIN integrates a two-stage stop discriminator to identify reflective yet redundant reasoning and a sliding-window Upper Confidence Bound (SW-UCB) multi-armed bandit controller to dynamically adjust stopping thresholds according to problem difficulty without supervision or fine-tuning. Across four representative benchmarks and two model families, REFRAIN reduces token usage by 20-55{\%} while maintaining or improving accuracy compared to standard CoT prompting. Extensive ablation and robustness analyses demonstrate its stability across models, scorers, and prompt variations. In summary, our findings highlight when-to-stop as a new and practical axis of test-time scaling {---} enabling models to reason not just more, but just enough."
}Markdown (Informal)
[Stop When Enough: Adaptive Early-Stopping for Chain-of-Thought Reasoning](https://preview.aclanthology.org/ingest-acl/2026.acl-long.1256/) (Sun et al., ACL 2026)
ACL