@inproceedings{heddaya-etal-2026-internalization,
title = "When Internalization Fails: Finding Better Targets for Reasoning Compression",
author = "Heddaya, Mourad and
Roberts, Manley and
Wadhawan, Rohan and
Tan, Chenhao",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.findings-acl.734/",
pages = "14935--14946",
ISBN = "979-8-89176-395-1",
abstract = "Reasoning language models generate long reasoning traces that increase latency and cost. We study how to shorten these traces while preserving accuracy on competition-level mathematics. In a teacher-student distillation setup, we compare three approaches: (i) inference-time truncation after the first $k$ tokens, (ii) Implicit Chain-of-Thought (ICoT)-style curricula that progressively shorten the teacher trace during training, and (iii) direct distillation to shorter reasoning traces. Using NuminaMath 1.5 with traces from DeepSeek-R1 and QwQ-32B, we distill into Qwen2.5-7B and measure accuracy against total tokens generated. We find: (1) with standard SFT and first-$k$ truncation, models compensate by generating longer text after reasoning, undermining token savings; (2) ICoT-style curricula provide little benefit on competition-level mathematics, where reasoning traces are long and diverse; and (3) training on post-think, text the teacher generates after reasoning, achieves the best accuracy{--}efficiency trade-off among all shortened targets, outperforming generic summaries at matched token budgets. These results show that curriculum-based internalization methods effective on simple tasks do not transfer to complex reasoning, and that post-think provides a better distillation target."
}Markdown (Informal)
[When Internalization Fails: Finding Better Targets for Reasoning Compression](https://preview.aclanthology.org/ingest-acl/2026.findings-acl.734/) (Heddaya et al., Findings 2026)
ACL