@inproceedings{liu-etal-2026-diet,
title = "Diet-{KIT}: Post-Training Quantization for Speech {LLM}s",
author = "Liu, Danni and
Koneru, Sai and
Niehues, Jan",
editor = "Salesky, Elizabeth and
Anastasopoulos, Antonios and
Negri, Matteo and
Federico, Marcello",
booktitle = "Proceedings of the 23rd International Conference on Spoken Language Translation ({IWSLT} 2026)",
month = jul,
year = "2026",
address = "San Diego, USA (in-person and online)",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/corrections-2026-06/2026.iwslt-1.21/",
doi = "10.18653/v1/2026.iwslt-1.21",
pages = "189--196",
ISBN = "979-8-89176-411-8",
abstract = "We present Diet-KIT, a system for the IWSLT speech translation compression task under a strict 4 GB on-disk storage constraint, starting from the 16 GB Qwen2-Audio-7B base model. Compression is achieved with a sequential pipeline based on Half-Quadratic Quantization (HQQ). Based on systematic ablations, we find that 4-bit quantization preserves translation quality well, whereas 3-bit quantization induces a sharp performance cliff, precluding aggressive compression across the whole model. We further show that the embedding table tolerates 2-bit quantization with negligible loss, while the LM head requires higher precision. To satisfy the storage constraint, we propose a sensitivity-guided layer selection method that identifies MLP sublayers tolerant to 3-bit compression via a per-layer sensitivity analysis, which consistently outperforms manual and random layer selection. Finally, AWQ calibration is applied as a data-driven refinement stage. The final system achieves 3.98 GB on disk with COMET scores of 74.4 on en{\textrightarrow}de and 77.1 on en{\textrightarrow}zh, compared to 75.6 and 79.5 for the uncompressed fine-tuned model."
}Markdown (Informal)
[Diet-KIT: Post-Training Quantization for Speech LLMs](https://preview.aclanthology.org/corrections-2026-06/2026.iwslt-1.21/) (Liu et al., IWSLT 2026)
ACL
- Danni Liu, Sai Koneru, and Jan Niehues. 2026. Diet-KIT: Post-Training Quantization for Speech LLMs. In Proceedings of the 23rd International Conference on Spoken Language Translation (IWSLT 2026), pages 189–196, San Diego, USA (in-person and online). Association for Computational Linguistics.