@inproceedings{cols-2024-spanish,
title = "{S}panish Corpus and Provenance with Computer-Aided Translation for the {WMT}24 {OLDI} Shared Task",
author = "Cols, Jose",
editor = "Haddow, Barry and
Kocmi, Tom and
Koehn, Philipp and
Monz, Christof",
booktitle = "Proceedings of the Ninth Conference on Machine Translation",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2024.wmt-1.50/",
doi = "10.18653/v1/2024.wmt-1.50",
pages = "624--635",
abstract = "This paper presents the Seed-CAT submission to the WMT24 Open Language Data Initiative shared task. We detail our data collection method, which involves a computer-aided translation tool developed explicitly for translating Seed corpora. We release a professionally translated Spanish corpus and a provenance dataset documenting the translation process. The quality of the data was validated on the FLORES+ benchmark with English-Spanish neural machine translation models, achieving an average chrF++ score of 34.9."
}
Markdown (Informal)
[Spanish Corpus and Provenance with Computer-Aided Translation for the WMT24 OLDI Shared Task](https://preview.aclanthology.org/add-emnlp-2024-awards/2024.wmt-1.50/) (Cols, WMT 2024)
ACL