@inproceedings{kvapilikova-bojar-2023-boosting,
title = "Boosting Unsupervised Machine Translation with Pseudo-Parallel Data",
author = "Kvapil{\'i}kov{\'a}, Ivana and
Bojar, Ond{\v{r}}ej",
editor = "Utiyama, Masao and
Wang, Rui",
booktitle = "Proceedings of Machine Translation Summit XIX, Vol. 1: Research Track",
month = sep,
year = "2023",
address = "Macau SAR, China",
publisher = "Asia-Pacific Association for Machine Translation",
url = "https://preview.aclanthology.org/fix-sig-urls/2023.mtsummit-research.12/",
pages = "135--147",
abstract = "Even with the latest developments in deep learning and large-scale language modeling, the task of machine translation (MT) of low-resource languages remains a challenge. Neural MT systems can be trained in an unsupervised way without any translation resources but the quality lags behind, especially in truly low-resource conditions. We propose a training strategy that relies on pseudo-parallel sentence pairs mined from monolingual corpora in addition to synthetic sentence pairs back-translated from monolingual corpora. We experiment with different training schedules and reach an improvement of up to 14.5 BLEU points (English to Ukrainian) over a baseline trained on back-translated data only."
}
Markdown (Informal)
[Boosting Unsupervised Machine Translation with Pseudo-Parallel Data](https://preview.aclanthology.org/fix-sig-urls/2023.mtsummit-research.12/) (Kvapilíková & Bojar, MTSummit 2023)
ACL