@article{huynh-etal-2026-distribution,
title = "Distribution-aware Low-bitwidth Quantization for Large Language Models",
author = "Huynh, Bao Tan Duy and
Tsunakawa, Takashi and
Nishida, Masafumi",
editor = "Piperidis, Stelios and
Bel, N{\'u}ria and
van den Heuvel, Henk and
Ide, Nancy and
Krek, Simon and
Toral, Antonio",
journal = "International Conference on Language Resources and Evaluation",
volume = "main",
month = may,
year = "2026",
address = "Palma de Mallorca, Spain",
publisher = "ELRA Language Resource Association",
url = "https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.789/",
pages = "10057--10070",
abstract = "The increasing scale and complexity of large language models (LLMs) present significant computational and memory challenges, limiting their widespread deployment. Post-training quantization (PTQ) has emerged as a key technique for mitigating these challenges without costly retraining. However, compressing models to ultra-low bitwidths (e.g., 2-3 bits) while maintaining accuracy remains a major challenge. In this study, we present a comprehensive PTQ framework that addresses this problem by compressing LLM weights through three core innovations: (1) a calibration process guided by Kullback-Leibler divergence minimization to preserve the original weight distribution, (2) a learnable codebook optimization mechanism employing noise substitution for vector quantization to enable robust gradient estimation, and (3) a layer-grouping strategy based on statistical distribution similarity to improve parameter efficiency. Experimental evaluations on large-scale models show that the proposed framework achieves competitive performance compared with state-of-the-art quantization techniques. Importantly, these results are obtained without any post-quantization fine-tuning, highlighting the efficiency and practical applicability of our approach for deploying highly compressed LLMs."
}Markdown (Informal)
[Distribution-aware Low-bitwidth Quantization for Large Language Models](https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.789/) (Huynh et al., LREC 2026)
ACL