@inproceedings{nguyen-etal-2025-structured,
title = "Structured Pruning for Diverse Best-of-$N$ Reasoning Optimization",
author = "Nguyen, Hieu Trung and
Nguyen, Bao and
Nguyen, Viet Anh",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/display_plenaries/2025.findings-acl.1225/",
pages = "23911--23922",
ISBN = "979-8-89176-256-5",
abstract = "Model pruning in transformer-based language models, traditionally seen as a means of computational savings, can enhance the model{'}s reasoning capabilities. In this work, we uncover the surprising phenomenon that the selective pruning of certain attention heads leads to improvements in reasoning performance, particularly on challenging tasks. Motivated by this observation, we propose SPRINT, a novel contrastive learning framework that dynamically selects the optimal head and layer to prune during inference. By aligning question embeddings with head embeddings, our approach identifies those pruned-head configurations that result in more accurate reasoning. Extensive experiments on the MATH dataset demonstrate that our method significantly outperforms traditional best-of-$N$ and random head selection strategies on the MATH500 and GSM8K datasets."
}
Markdown (Informal)
[Structured Pruning for Diverse Best-of-N Reasoning Optimization](https://preview.aclanthology.org/display_plenaries/2025.findings-acl.1225/) (Nguyen et al., Findings 2025)
ACL