@inproceedings{zhang-etal-2025-adaptthink,
title = "{A}dapt{T}hink: Reasoning Models Can Learn When to Think",
author = "Zhang, Jiajie and
Lin, Nianyi and
Hou, Lei and
Feng, Ling and
Li, Juanzi",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-main.184/",
pages = "3716--3730",
ISBN = "979-8-89176-332-6",
abstract = "Recently, large reasoning models have achieved impressive performance on various tasks by employing human-like deep thinking. However, the lengthy thinking process substantially increases inference overhead, making efficiency a critical bottleneck. In this work, we first demonstrate that \textit{NoThinking}, which prompts the reasoning model to skip thinking and directly generate the final solution, is a better choice for relatively simple tasks in terms of both performance and efficiency. Motivated by this, we propose \textit{AdaptThink}, a novel RL algorithm to teach reasoning models to choose the optimal thinking mode adaptively based on problem difficulty. Specifically, \textit{AdaptThink} features two core components: (1) a constrained optimization objective that encourages the model to choose \textit{NoThinking} while maintaining the overall performance; (2) an importance sampling strategy that balances \textit{Thinking} and \textit{NoThinking} samples during on-policy training, thereby enabling cold start and allowing the model to explore and exploit both thinking modes throughout the training process. Our experiments indicate that \textit{AdaptThink} significantly reduces the inference costs while further enhancing performance. Notably, on three math datasets, \textit{AdaptThink} reduces the average response length of DeepSeek-R1-Distill-Qwen-1.5B by 53{\%} and improves its accuracy by 2.4{\%}, highlighting the promise of adaptive thinking-mode selection for optimizing the balance between reasoning quality and efficiency."
}Markdown (Informal)
[AdaptThink: Reasoning Models Can Learn When to Think](https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-main.184/) (Zhang et al., EMNLP 2025)
ACL
- Jiajie Zhang, Nianyi Lin, Lei Hou, Ling Feng, and Juanzi Li. 2025. AdaptThink: Reasoning Models Can Learn When to Think. In Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing, pages 3716–3730, Suzhou, China. Association for Computational Linguistics.