@inproceedings{sun-etal-2025-enhancing-machine,
title = "Enhancing Machine Translation with Self-Supervised Preference Data",
author = "Sun, Haoxiang and
Gao, Ruize and
Zhang, Pei and
Yang, Baosong and
Wang, Rui",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingestion-acl-25/2025.acl-long.1165/",
pages = "23916--23934",
ISBN = "979-8-89176-251-0",
abstract = "Model alignment methods like Direct Preference Optimization and Contrastive Preference Optimization have enhanced machine translation performance by leveraging preference data to enable models to reject suboptimal outputs. During preference data construction, previous approaches primarily rely on humans, strong models like GPT4 or model self-sampling. In this study, we first explain the shortcomings of this practice. Then, we propose Self-Supervised Preference Optimization (SSPO), a novel framework which efficiently constructs translation preference data for iterative DPO training. Applying SSPO to 14B parameters large language models (LLMs) achieves comparable or better performance than GPT-4o on FLORES and multi-domain test datasets. We release an augmented MQM dataset in https://github.com/sunny-sjtu/MQM-aug."
}
Markdown (Informal)
[Enhancing Machine Translation with Self-Supervised Preference Data](https://preview.aclanthology.org/ingestion-acl-25/2025.acl-long.1165/) (Sun et al., ACL 2025)
ACL
- Haoxiang Sun, Ruize Gao, Pei Zhang, Baosong Yang, and Rui Wang. 2025. Enhancing Machine Translation with Self-Supervised Preference Data. In Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 23916–23934, Vienna, Austria. Association for Computational Linguistics.