@inproceedings{williams-etal-2025-self,
title = "Self-calibration for Language Model Quantization and Pruning",
author = "Williams, Miles and
Chrysostomou, George and
Aletras, Nikolaos",
editor = "Chiruzzo, Luis and
Ritter, Alan and
Wang, Lu",
booktitle = "Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)",
month = apr,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/corrections-2025-06/2025.naacl-long.509/",
doi = "10.18653/v1/2025.naacl-long.509",
pages = "10149--10167",
ISBN = "979-8-89176-189-6",
abstract = "Quantization and pruning are fundamental approaches for model compression, enabling efficient inference for language models. In a post-training setting, state-of-the-art quantization and pruning methods require calibration data, a small set of unlabeled examples. Conventionally, this is randomly sampled web text, aiming to reflect the model training data. However, this poses two key problems: (1) unrepresentative calibration examples can harm model performance, and (2) organizations increasingly avoid releasing model training data. In this paper, we propose self-calibration as a solution. Our approach requires no external data, instead leveraging the model itself to generate synthetic calibration data, with a view to better approximating the pre-training data distribution. We extensively compare the performance of self-calibration with several baselines, across a variety of models, compression methods, and tasks. Our approach proves consistently competitive in maximizing downstream task performance, frequently outperforming even using real data."
}
Markdown (Informal)
[Self-calibration for Language Model Quantization and Pruning](https://preview.aclanthology.org/corrections-2025-06/2025.naacl-long.509/) (Williams et al., NAACL 2025)
ACL
- Miles Williams, George Chrysostomou, and Nikolaos Aletras. 2025. Self-calibration for Language Model Quantization and Pruning. In Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers), pages 10149–10167, Albuquerque, New Mexico. Association for Computational Linguistics.