@inproceedings{luu-etal-2026-machine,
title = "Machine Translation for Low-Resource Languages through Monolingual Data and {LLM}: A Case Study of {E}nglish-to-{B}asque",
author = "Luu, Nam and
Soroa, Aitor and
Rigau, German and
Bojar, Ond{\v{r}}ej",
editor = "Baez Santamaria, Selene and
Somayajula, Sai Ashish and
Yamaguchi, Atsuki",
booktitle = "Proceedings of the 19th Conference of the {E}uropean Chapter of the {A}ssociation for {C}omputational {L}inguistics (Volume 4: Student Research Workshop)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-eacl/2026.eacl-srw.6/",
pages = "60--91",
ISBN = "979-8-89176-383-8",
abstract = "Developing a machine translation (MT) system requires a considerable amount of high-quality parallel data, which is often limited for low-resource languages. This paper explores the use of synthetic data for training an LLM-based MT system in the English-to-Basque direction. Using Basque monolingual corpora as a starting point, we apply back-translation to generate parallel corpora, taking advantage of the fact that current LLMs do not translate well from English to Basque, but they yield an acceptable performance in the reverse direction. We conduct experiments in a multi-stage approach, from a simple Supervised Fine-tuning (SFT) step, to preference learning with the Direct Preference Optimization (DPO) technique. We then evaluate the approach with both automatic metrics and manual assessment. Experimental results suggest that for this task, SFT brings a clear improvement in translation quality, while DPO only yields marginal enhancement."
}Markdown (Informal)
[Machine Translation for Low-Resource Languages through Monolingual Data and LLM: A Case Study of English-to-Basque](https://preview.aclanthology.org/ingest-eacl/2026.eacl-srw.6/) (Luu et al., EACL 2026)
ACL