@inproceedings{gao-etal-2025-bypass,
title = "Bypass Back-propagation: Optimization-based Structural Pruning for Large Language Models via Policy Gradient",
author = "Gao, Yuan and
Liu, Zujing and
Zhang, Weizhong and
Du, Bo and
Xia, Gui-Song",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingestion-acl-25/2025.acl-long.1421/",
pages = "29356--29377",
ISBN = "979-8-89176-251-0",
abstract = "Recent Large-Language Models (LLMs) pruning methods typically operate at the post-training phase without the expensive weight finetuning, however, their pruning criteria often rely on **heuristically hand-crafted metrics**, potentially leading to suboptimal performance. We instead propose a novel **optimization-based structural pruning** that learns the pruning masks in a probabilistic space directly by optimizing the loss of the pruned model. To preserve the efficiency, our method **eliminates the back-propagation** through the LLM *per se* during the optimization, requiring only **the forward pass of the LLM**. We achieve this by learning an underlying Bernoulli distribution to sample binary pruning masks, where we decouple the Bernoulli parameters from the LLM loss, thus facilitating an efficient optimization via *policy gradient estimator* without back-propagation. As a result, our method is able to 1) *support global and heterogeneous pruning* (*i.e.*, our method automatically determines different redundancy for different layers), and 2) *optionally initialize with a metric-based method* (for our Bernoulli distributions). Extensive experiments conducted on LLaMA, LLaMA-2, LLaMA-3, Vicuna, and Mistral models using the C4 and WikiText2 datasets demonstrate the promising performance of our method in efficiency and effectiveness."
}
Markdown (Informal)
[Bypass Back-propagation: Optimization-based Structural Pruning for Large Language Models via Policy Gradient](https://preview.aclanthology.org/ingestion-acl-25/2025.acl-long.1421/) (Gao et al., ACL 2025)
ACL