@inproceedings{mutal-ormaechea-2024-tim,
title = "{TIM}-{UNIGE} Translation into Low-Resource Languages of {S}pain for {WMT}24",
author = "Mutal, Jonathan and
Ormaechea, Luc{\'i}a",
editor = "Haddow, Barry and
Kocmi, Tom and
Koehn, Philipp and
Monz, Christof",
booktitle = "Proceedings of the Ninth Conference on Machine Translation",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest_wac_2008/2024.wmt-1.82/",
doi = "10.18653/v1/2024.wmt-1.82",
pages = "862--870",
abstract = "We present the results of our constrained submission to the WMT 2024 shared task, which focuses on translating from Spanish into two low-resource languages of Spain: Aranese (spa-arn) and Aragonese (spa-arg). Our system integrates real and synthetic data generated by large language models (e.g., BLOOMZ) and rule-based Apertium translation systems. Built upon the pre-trained NLLB system, our translation model utilizes a multistage approach, progressively refining the initial model through the sequential use of different datasets, starting with large-scale synthetic or crawled data and advancing to smaller, high-quality parallel corpora. This approach resulted in BLEU scores of 30.1 for Spanish to Aranese and 61.9 for Spanish to Aragonese."
}
Markdown (Informal)
[TIM-UNIGE Translation into Low-Resource Languages of Spain for WMT24](https://preview.aclanthology.org/ingest_wac_2008/2024.wmt-1.82/) (Mutal & Ormaechea, WMT 2024)
ACL