@inproceedings{wang-etal-2024-pruning,
title = "Pruning before Fine-tuning: A Retraining-free Compression Framework for Pre-trained Language Models",
author = "Wang, Pingjie and
Liu, Hongcheng and
Wang, Yanfeng and
Wang, Yu",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://preview.aclanthology.org/corrections-2025-06/2024.lrec-main.1162/",
pages = "13279--13289",
abstract = "Structured pruning is an effective technique for compressing pre-trained language models (PLMs), reducing model size and improving inference speed for efficient deployment. However, most of existing pruning algorithms require retraining, leading to additional computational overhead. While some retraining-free approaches have been proposed for classification tasks, they still require a fully fine-tuned model for the task, and may cause catastrophic performance degradation on generative tasks. To address these challenges, we propose P-pruning (pre-pruning), an innovative task-specific compression framework. P-pruning prunes redundant modules of PLMs before fine-tuning, reducing the costs associated with fine-tuning. We also introduce a pruning algorithm for this framework, which includes two techniques: (1) module clustering, which clusters the outputs of all heads and neurons based on the task input; and (2) centroid selection, which identifies the most salient element in each cluster and prunes the others. We apply our method to BERT and GPT-2 and evaluate its effectiveness on GLUE, SQuAD, WikiText-2, WikiText-103, and PTB datasets. Experimental results demonstrate that our approach achieves higher performance in both classification and generative tasks, while also reducing the time required for fine-tuning."
}
Markdown (Informal)
[Pruning before Fine-tuning: A Retraining-free Compression Framework for Pre-trained Language Models](https://preview.aclanthology.org/corrections-2025-06/2024.lrec-main.1162/) (Wang et al., LREC-COLING 2024)
ACL