@inproceedings{libovicky-fraser-2020-towards,
title = "Towards Reasonably-Sized Character-Level Transformer {NMT} by Finetuning Subword Systems",
author = "Libovick{\'y}, Jind{\v{r}}ich and
Fraser, Alexander",
editor = "Webber, Bonnie and
Cohn, Trevor and
He, Yulan and
Liu, Yang",
booktitle = "Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2020.emnlp-main.203/",
doi = "10.18653/v1/2020.emnlp-main.203",
pages = "2572--2579",
abstract = "Applying the Transformer architecture on the character level usually requires very deep architectures that are difficult and slow to train. These problems can be partially overcome by incorporating a segmentation into tokens in the model. We show that by initially training a subword model and then finetuning it on characters, we can obtain a neural machine translation model that works at the character level without requiring token segmentation. We use only the vanilla 6-layer Transformer Base architecture. Our character-level models better capture morphological phenomena and show more robustness to noise at the expense of somewhat worse overall translation quality. Our study is a significant step towards high-performance and easy to train character-based models that are not extremely large."
}
Markdown (Informal)
[Towards Reasonably-Sized Character-Level Transformer NMT by Finetuning Subword Systems](https://preview.aclanthology.org/fix-sig-urls/2020.emnlp-main.203/) (Libovický & Fraser, EMNLP 2020)
ACL