@inproceedings{martinez-garcia-garcia-tejedor-2020-latin,
title = "{L}atin-{S}panish Neural Machine Translation: from the {B}ible to Saint Augustine",
author = "Mart{\'i}nez Garcia, Eva and
Garc{\'i}a Tejedor, {\'A}lvaro",
editor = "Sprugnoli, Rachele and
Passarotti, Marco",
booktitle = "Proceedings of LT4HALA 2020 - 1st Workshop on Language Technologies for Historical and Ancient Languages",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association (ELRA)",
url = "https://preview.aclanthology.org/fix-sig-urls/2020.lt4hala-1.14/",
pages = "94--99",
language = "eng",
ISBN = "979-10-95546-53-5",
abstract = "Although there are several sources where to find historical texts, they usually are available in the original language that makes them generally inaccessible. This paper presents the development of state-of-the-art Neural Machine Systems for the low-resourced Latin-Spanish language pair. First, we build a Transformer-based Machine Translation system on the Bible parallel corpus. Then, we build a comparable corpus from Saint Augustine texts and their translations. We use this corpus to study the domain adaptation case from the Bible texts to Saint Augustine{'}s works. Results show the difficulties of handling a low-resourced language as Latin. First, we noticed the importance of having enough data, since the systems do not achieve high BLEU scores. Regarding domain adaptation, results show how using in-domain data helps systems to achieve a better quality translation. Also, we observed that it is needed a higher amount of data to perform an effective vocabulary extension that includes in-domain vocabulary."
}
Markdown (Informal)
[Latin-Spanish Neural Machine Translation: from the Bible to Saint Augustine](https://preview.aclanthology.org/fix-sig-urls/2020.lt4hala-1.14/) (Martínez Garcia & García Tejedor, LT4HALA 2020)
ACL