@inproceedings{el-kurdi-etal-2022-zero,
title = "Zero-Shot Dynamic Quantization for Transformer Inference",
author = "El-kurdi, Yousef and
Quinn, Jerry and
Sil, Avi",
editor = "Li, Yunyao and
Lazaridou, Angeliki",
booktitle = "Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing: Industry Track",
month = dec,
year = "2022",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2022.emnlp-industry.45/",
doi = "10.18653/v1/2022.emnlp-industry.45",
pages = "451--457",
abstract = "We introduce a novel run-time method for significantly reducing the accuracy loss associated with quantizing BERT-like models to 8-bit integers. Existing methods for quantizing models either modify the training procedure, or they require an additional calibration step to adjust parameters that also requires a selected held-out dataset. Our method permits taking advantage of quantization without the need for these adjustments. We present results on several NLP tasks demonstrating the usefulness of this technique."
}
Markdown (Informal)
[Zero-Shot Dynamic Quantization for Transformer Inference](https://preview.aclanthology.org/jlcl-multiple-ingestion/2022.emnlp-industry.45/) (El-kurdi et al., EMNLP 2022)
ACL
- Yousef El-kurdi, Jerry Quinn, and Avi Sil. 2022. Zero-Shot Dynamic Quantization for Transformer Inference. In Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing: Industry Track, pages 451–457, Abu Dhabi, UAE. Association for Computational Linguistics.