@inproceedings{tan-etal-2024-uva,
title = "{U}v{A}-{MT}{'}s Participation in the {WMT}24 General Translation Shared Task",
author = "Tan, Shaomu and
Wu, Di and
Stap, David and
Aycock, Seth and
Monz, Christof",
editor = "Haddow, Barry and
Kocmi, Tom and
Koehn, Philipp and
Monz, Christof",
booktitle = "Proceedings of the Ninth Conference on Machine Translation",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2024.wmt-1.11/",
doi = "10.18653/v1/2024.wmt-1.11",
pages = "176--184",
abstract = "Fine-tuning Large Language Models (FT-LLMs) with parallel data has emerged as a promising paradigm in recent machine translation research. In this paper, we explore the effectiveness of FT-LLMs and compare them to traditional encoder-decoder Neural Machine Translation (NMT) systems under the WMT24 general MT shared task for English to Chinese direction. We implement several techniques, including Quality Estimation (QE) data filtering, supervised fine-tuning, and post-editing that integrate NMT systems with LLMs. We demonstrate that fine-tuning LLaMA2 on a high-quality but relatively small bitext dataset (100K) yields COMET results comparable to much smaller encoder-decoder NMT systems trained on over 22 million bitexts. However, this approach largely underperforms on surface-level metrics like BLEU and ChrF. We further control the data quality using the COMET-based quality estimation method. Our experiments show that 1) filtering low COMET scores largely improves encoder-decoder systems, but 2) no clear gains are observed for LLMs when further refining the fine-tuning set. Finally, we show that combining NMT systems with LLMs via post-editing generally yields the best performance for the WMT24 official test set."
}
Markdown (Informal)
[UvA-MT’s Participation in the WMT24 General Translation Shared Task](https://preview.aclanthology.org/fix-sig-urls/2024.wmt-1.11/) (Tan et al., WMT 2024)
ACL