@inproceedings{zhou-etal-2026-thinking,
title = "When More Thinking Hurts: Overthinking in {LLM} Test-Time Compute Scaling",
author = "Zhou, Shu and
Ling, Rui and
Chen, Junan and
Wang, Xin and
Fan, Tao and
Wang, Hao",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.findings-acl.1199/",
pages = "23967--23977",
ISBN = "979-8-89176-395-1",
abstract = "Scaling test-time compute through extended chains of thought has become a dominant paradigm for improving large language model reasoning. However, existing research implicitly assumes that longer thinking always yields better results. This assumption remains largely unexamined. We systematically investigate how the marginal utility of additional reasoning tokens changes as compute budgets increase. We find that marginal returns diminish substantially at higher budgets and that models exhibit overthinking, where extended reasoning is associated with abandoning previously correct answers. Furthermore, we show that optimal thinking length varies across problem difficulty, suggesting that uniform compute allocation is suboptimal. Our cost-aware evaluation framework reveals that stopping at moderate budgets can reduce computation significantly while maintaining comparable accuracy."
}Markdown (Informal)
[When More Thinking Hurts: Overthinking in LLM Test-Time Compute Scaling](https://preview.aclanthology.org/ingest-acl/2026.findings-acl.1199/) (Zhou et al., Findings 2026)
ACL