@inproceedings{saleva-lignos-2024-language,
title = "Language Model Priors and Data Augmentation Strategies for Low-resource Machine Translation: A Case Study Using {F}innish to {N}orthern {S}{\'a}mi",
author = {S{\"a}lev{\"a}, Jonne and
Lignos, Constantine},
editor = "Ku, Lun-Wei and
Martins, Andre and
Srikumar, Vivek",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2024",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2024.findings-acl.768/",
doi = "10.18653/v1/2024.findings-acl.768",
pages = "12949--12956",
abstract = "We investigate ways of using monolingual data in both the source and target languages for improving low-resource machine translation. As a case study, we experiment with translation from Finnish to Northern S{\'a}mi.Our experiments show that while conventional backtranslation remains a strong contender, using synthetic target-side data when training backtranslation models can be helpful as well.We also show that monolingual data can be used to train a language model which can act as a regularizer without any augmentation of parallel data."
}
Markdown (Informal)
[Language Model Priors and Data Augmentation Strategies for Low-resource Machine Translation: A Case Study Using Finnish to Northern Sámi](https://preview.aclanthology.org/fix-sig-urls/2024.findings-acl.768/) (Sälevä & Lignos, Findings 2024)
ACL