@inproceedings{singh-etal-2025-instruction,
title = "Instruction-Tuned {E}nglish to {B}hojpuri Neural Machine Translation Using Contrastive Preference Optimization",
author = "Singh, Kshetrimayum Boynao and
Kumar, Deepak and
Ekbal, Asif",
editor = "Haddow, Barry and
Kocmi, Tom and
Koehn, Philipp and
Monz, Christof",
booktitle = "Proceedings of the Tenth Conference on Machine Translation",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-emnlp/2025.wmt-1.38/",
pages = "638--643",
ISBN = "979-8-89176-341-8",
abstract = "This paper presents an English to Bhojpuri machine translation (MT) system developed for the WMT25 General MT Shared Task. Given the low-resource nature of Bhojpuri, we adopt a two-stage training pipeline: unsupervised pretraining followed by supervised fine-tuning. During pretraining, we use a 300,000-sentence corpus comprising 70{\%} Bhojpuri monolingual data and 30{\%} English data to establish language grounding. The fine-tuning stage utilizes 29,749 bilingual English to Bhojpuri sentence pairs (including training, validation, and test sets). To adapt the system to instruction-following scenarios, we apply a novel optimization strategy: Contrastive Preference Optimization (CPO). This technique enables the model to capture fine-grained translation preferences and maintain semantic fidelity in instruction-tuned settings. We evaluate our system across multiple metrics, demonstrating moderate performance in low-resource MT tasks, particularly in diverse domains such as literary, news, social, and speech."
}Markdown (Informal)
[Instruction-Tuned English to Bhojpuri Neural Machine Translation Using Contrastive Preference Optimization](https://preview.aclanthology.org/ingest-emnlp/2025.wmt-1.38/) (Singh et al., WMT 2025)
ACL