@inproceedings{liu-wang-2025-answer,
title = "Answer Convergence as a Signal for Early Stopping in Reasoning",
author = "Liu, Xin and
Wang, Lu",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-main.904/",
pages = "17907--17918",
ISBN = "979-8-89176-332-6",
abstract = "Chain-of-thought (CoT) prompting enhances reasoning in large language models (LLMs) but often leads to verbose and redundant outputs, thus increasing inference cost. We hypothesize that many reasoning steps are unnecessary for producing correct answers. To investigate this, we start with a systematic study to investigate what is the minimum reasoning required for a model to reach a stable decision. Based on the insights, we propose three inference-time strategies to improve efficiency: (1) early stopping via answer consistency, (2) boosting the probability of generating end-of-reasoning signals, and (3) a supervised method that learns when to stop based on internal activations. Experiments across five benchmarks and five open-weights LLMs show that our methods largely reduce token usage with little or no accuracy drop. In particular, on NaturalQuestions, Answer Consistency reduces tokens by over 40{\%} while further improving accuracy. Our work underscores the importance of cost-effective reasoning methods that operate at inference time, offering practical benefits for real-world applications."
}Markdown (Informal)
[Answer Convergence as a Signal for Early Stopping in Reasoning](https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-main.904/) (Liu & Wang, EMNLP 2025)
ACL