@inproceedings{uhlig-etal-2025-cross,
title = "Cross-lingual Human-Preference Alignment for Neural Machine Translation with Direct Quality Optimization",
author = "Uhlig, Kaden and
Wuebker, Joern and
Reinauer, Raphael and
Denero, John",
editor = "Haddow, Barry and
Kocmi, Tom and
Koehn, Philipp and
Monz, Christof",
booktitle = "Proceedings of the Tenth Conference on Machine Translation",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-emnlp/2025.wmt-1.2/",
pages = "31--51",
ISBN = "979-8-89176-341-8",
abstract = "Reinforcement Learning from Human Feedback (RLHF) and derivative techniques like Direct Preference Optimization (DPO) are task-alignment algorithms used to repurpose general, foundational models for specific tasks. We show that applying task-alignment to neural machine translation (NMT) addresses an existing task{--}data mismatch in NMT, leading to improvements across all languages of a multilingual model, even when task-alignment is only applied to a subset of those languages. We do so by introducing Direct Quality Optimization (DQO), a variant of DPO leveraging a pre-trained translation quality estimation model as a proxy for human preferences, and verify the improvements with both automatic metrics and through human evaluation."
}Markdown (Informal)
[Cross-lingual Human-Preference Alignment for Neural Machine Translation with Direct Quality Optimization](https://preview.aclanthology.org/ingest-emnlp/2025.wmt-1.2/) (Uhlig et al., WMT 2025)
ACL