@inproceedings{steingrimsson-2023-ast,
title = "The {AST} Submission for the {C}o{C}o4{MT} 2023 Shared Task on Corpus Construction for Low-Resource Machine Translation",
author = "Steingr{\'i}msson, Stein{\th}{\'o}r",
booktitle = "Proceedings of the Second Workshop on Corpus Generation and Corpus Augmentation for Machine Translation",
month = sep,
year = "2023",
address = "Macau SAR, China",
publisher = "Asia-Pacific Association for Machine Translation",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.mtsummit-coco4mt.5/",
pages = "33--38",
abstract = "We describe the AST submission for the CoCo4MT 2023 shared task. The aim of the task is to identify the best candidates for translation in a source data set with the aim to use the translated parallel data for fine-tuning the mBART-50 model. We experiment with three methods: scoring sentences based on n-gram coverage, using LaBSE to estimate semantic similarity and identify misalignments and mistranslations by comparing machine translated source sentences to corresponding manually translated segments in high-resource languages. We find that we obtain the best results by combining these three methods, using LaBSE and machine translation for filtering, and one of our n-gram scoring approaches for ordering sentences."
}
Markdown (Informal)
[The AST Submission for the CoCo4MT 2023 Shared Task on Corpus Construction for Low-Resource Machine Translation](https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.mtsummit-coco4mt.5/) (Steingrímsson, MTSummit 2023)
ACL