@inproceedings{jiang-etal-2026-drp,
title = "{DRP}: Distilled Reasoning Pruning with Mathematical Skill-aware Step Decomposition for Efficient Large Reasoning Models",
author = "Jiang, Yuxuan and
Li, Dawei and
Ferraro, Francis",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.findings-acl.196/",
pages = "4020--4039",
ISBN = "979-8-89176-395-1",
abstract = "While Large Reasoning Models (LRMs) excel at complex tasks via long Chain-of-Thought (CoT) reasoning, their outputs are often excessively verbose, leading to inefficiency. This problem is amplified when the student{'}s long-form reasoning mismatches the concise outputs of smaller teacher models{---}common in LLM distillation to avoid using costly large teachers. To address this issue, we propose Distilled Reasoning Pruning (DRP), a hybrid framework that combines inference-time pruning with tuning-based distillation. DRP leverages a teacher model to perform mathematical problem-solving skill-aware step decomposition and pruning, then distills the refined reasoning paths into a student model, enabling efficient and accurate reasoning. Across challenging math datasets, DRP significantly reduces token usage without sacrificing accuracy{---}for instance, cutting tokens on GSM8K from 917 to 328 while improving accuracy from 91.7{\%} to 94.1{\%}, and reducing AIME tokens by 43{\%} with no performance drop. Further analysis shows that aligning training CoT structure with the student{'}s capacity is key to effective knowledge transfer."
}Markdown (Informal)
[DRP: Distilled Reasoning Pruning with Mathematical Skill-aware Step Decomposition for Efficient Large Reasoning Models](https://preview.aclanthology.org/ingest-acl/2026.findings-acl.196/) (Jiang et al., Findings 2026)
ACL