@inproceedings{tu-etal-2026-deepprune,
title = "{D}eep{P}rune: Parallel Scaling without Inter-trace Redundancy",
author = "Tu, Shangqing and
Li, Yaxuan and
Bai, Yushi and
Hou, Lei and
Li, Juanzi",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.findings-acl.656/",
pages = "13389--13403",
ISBN = "979-8-89176-395-1",
abstract = "Parallel scaling has emerged as a powerful paradigm to enhance reasoning capabilities in large language models (LLMs) by generating multiple Chain-of-Thought (CoT) traces simultaneously. However, this approach introduces significant computational inefficiency due to *inter-trace redundancy*{---}our analysis reveals that over 80{\%} of parallel reasoning traces yield identical final answers, representing substantial wasted computation. To address this critical efficiency bottleneck, we propose **DeepPrune**, a novel framework that enables efficient parallel scaling through dynamic pruning. Our method features a specialized judge model trained with oversampling techniques to accurately predict answer equivalence from partial reasoning traces, achieving 0.7072 AUROC on equivalence prediction across unseen reasoning models. This is combined with an online greedy clustering algorithm that dynamically prunes redundant paths while preserving answer diversity. Comprehensive evaluations across three challenging benchmarks (AIME 2024, AIME 2025, and GPQA) and multiple reasoning models demonstrate that DeepPrune achieves remarkable token reduction ranging from 65.73{\%} to 88.50{\%} compared to conventional consensus sampling, while maintaining competitive accuracy within 3.4 percentage points. Our work establishes a new standard for efficient parallel reasoning, making high-performance reasoning more efficient. Our code and data are here: https://github.com/THU-KEG/DeepPrune/"
}Markdown (Informal)
[DeepPrune: Parallel Scaling without Inter-trace Redundancy](https://preview.aclanthology.org/ingest-acl/2026.findings-acl.656/) (Tu et al., Findings 2026)
ACL
- Shangqing Tu, Yaxuan Li, Yushi Bai, Lei Hou, and Juanzi Li. 2026. DeepPrune: Parallel Scaling without Inter-trace Redundancy. In Findings of the Association for Computational Linguistics: ACL 2026, pages 13389–13403, San Diego, California, United States. Association for Computational Linguistics.