@inproceedings{carrion-casacuberta-2022-effectiveness,
title = "On the Effectiveness of Quasi Character-Level Models for Machine Translation",
author = "Carri{\'o}n, Salvador and
Casacuberta, Francisco",
editor = "Duh, Kevin and
Guzm{\'a}n, Francisco",
booktitle = "Proceedings of the 15th biennial conference of the Association for Machine Translation in the Americas (Volume 1: Research Track)",
month = sep,
year = "2022",
address = "Orlando, USA",
publisher = "Association for Machine Translation in the Americas",
url = "https://preview.aclanthology.org/fix-sig-urls/2022.amta-research.10/",
pages = "131--143",
abstract = "Neural Machine Translation (NMT) models often use subword-level vocabularies to deal with rare or unknown words. Although some studies have shown the effectiveness of purely character-based models, these approaches have resulted in highly expensive models in computational terms. In this work, we explore the benefits of quasi-character-level models for very low-resource languages and their ability to mitigate the effects of the catastrophic forgetting problem. First, we conduct an empirical study on the efficacy of these models, as a function of the vocabulary and training set size, for a range of languages, domains, and architectures. Next, we study the ability of these models to mitigate the effects of catastrophic forgetting in machine translation. Our work suggests that quasi-character-level models have practically the same generalization capabilities as character-based models but at lower computational costs. Furthermore, they appear to help achieve greater consistency between domains than standard subword-level models, although the catastrophic forgetting problem is not mitigated."
}
Markdown (Informal)
[On the Effectiveness of Quasi Character-Level Models for Machine Translation](https://preview.aclanthology.org/fix-sig-urls/2022.amta-research.10/) (Carrión & Casacuberta, AMTA 2022)
ACL