@inproceedings{quinn-ballesteros-2018-pieces,
title = "Pieces of Eight: 8-bit Neural Machine Translation",
author = "Quinn, Jerry and
Ballesteros, Miguel",
editor = "Bangalore, Srinivas and
Chu-Carroll, Jennifer and
Li, Yunyao",
booktitle = "Proceedings of the 2018 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 3 (Industry Papers)",
month = jun,
year = "2018",
address = "New Orleans - Louisiana",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/N18-3014/",
doi = "10.18653/v1/N18-3014",
pages = "114--120",
abstract = "Neural machine translation has achieved levels of fluency and adequacy that would have been surprising a short time ago. Output quality is extremely relevant for industry purposes, however it is equally important to produce results in the shortest time possible, mainly for latency-sensitive applications and to control cloud hosting costs. In this paper we show the effectiveness of translating with 8-bit quantization for models that have been trained using 32-bit floating point values. Results show that 8-bit translation makes a non-negligible impact in terms of speed with no degradation in accuracy and adequacy."
}
Markdown (Informal)
[Pieces of Eight: 8-bit Neural Machine Translation](https://preview.aclanthology.org/add-emnlp-2024-awards/N18-3014/) (Quinn & Ballesteros, NAACL 2018)
ACL
- Jerry Quinn and Miguel Ballesteros. 2018. Pieces of Eight: 8-bit Neural Machine Translation. In Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 3 (Industry Papers), pages 114–120, New Orleans - Louisiana. Association for Computational Linguistics.