@inproceedings{li-etal-2026-think-better,
title = "Think Better, Not Longer: Token-Level Marginal Utility for Efficient Reasoning in Large Reasoning Models",
author = "Li, Jiawei and
Gao, Yang and
Sun, Huashan and
Feng, Chong",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.acl-long.1386/",
pages = "30052--30063",
ISBN = "979-8-89176-390-6",
abstract = "While Large Reasoning Models (LRMs) have demonstrated remarkable capabilities through explicit Chain-of-Thought (CoT) generation, they frequently suffer from ``overthinking''. In this work, we bridge this gap by introducing \textbf{Token-level Marginal Utility}, which quantifies the per-token log-probability gain of the ground-truth answer. Leveraging this dense supervision signal, we propose \textbf{MUTO} (\textbf{M}arginal \textbf{U}tility Guided \textbf{T}hinking \textbf{O}ptimization), a unified training framework designed to synthesize concise reasoning chains. Rather than relying only on coarse trajectory-level length control, MUTO identifies tokens that reduce the model{'}s likelihood of the correct answer and penalizes such negative-utility reasoning, yielding concise yet effective CoT trajectories. Experiments on DeepSeek-R1-Distill-Qwen backbones (1.5B and 7B) across six math reasoning benchmarks show that MUTO yields a markedly better efficiency-accuracy Pareto frontier. It reduces average token usage by 87.1{\%} at 1.5B while improving accuracy by 2.3{\%}, and cuts tokens by 80.2{\%} at 7B with only -0.1{\%} accuracy change, achieving the best length-normalized accuracy among baselines."
}Markdown (Informal)
[Think Better, Not Longer: Token-Level Marginal Utility for Efficient Reasoning in Large Reasoning Models](https://preview.aclanthology.org/ingest-acl/2026.acl-long.1386/) (Li et al., ACL 2026)
ACL