@inproceedings{falcao-etal-2024-comet,
title = "{COMET} for Low-Resource Machine Translation Evaluation: A Case Study of {E}nglish-{M}altese and {S}panish-{B}asque",
author = "Falc{\~a}o, J{\'u}lia and
Borg, Claudia and
Aranberri, Nora and
Abela, Kurt",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.lrec-main.315/",
pages = "3553--3565",
abstract = "Trainable metrics for machine translation evaluation have been scoring the highest correlations with human judgements in the latest meta-evaluations, outperforming traditional lexical overlap metrics such as BLEU, which is still widely used despite its well-known shortcomings. In this work we look at COMET, a prominent neural evaluation system proposed in 2020, to analyze the extent of its language support restrictions, and to investigate strategies to extend this support to new, under-resourced languages. Our case study focuses on English-Maltese and Spanish-Basque. We run a crowd-based evaluation campaign to collect direct assessments and use the annotated dataset to evaluate COMET-22, further fine-tune it, and to train COMET models from scratch for the two language pairs. Our analysis suggests that COMET`s performance can be improved with fine-tuning, and that COMET can be highly susceptible to the distribution of scores in the training data, which especially impacts low-resource scenarios."
}
Markdown (Informal)
[COMET for Low-Resource Machine Translation Evaluation: A Case Study of English-Maltese and Spanish-Basque](https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.lrec-main.315/) (Falcão et al., LREC-COLING 2024)
ACL