@inproceedings{fadaee-etal-2017-data,
title = "Data Augmentation for Low-Resource Neural Machine Translation",
author = "Fadaee, Marzieh and
Bisazza, Arianna and
Monz, Christof",
editor = "Barzilay, Regina and
Kan, Min-Yen",
booktitle = "Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)",
month = jul,
year = "2017",
address = "Vancouver, Canada",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/P17-2090/",
doi = "10.18653/v1/P17-2090",
pages = "567--573",
abstract = "The quality of a Neural Machine Translation system depends substantially on the availability of sizable parallel corpora. For low-resource language pairs this is not the case, resulting in poor translation quality. Inspired by work in computer vision, we propose a novel data augmentation approach that targets low-frequency words by generating new sentence pairs containing rare words in new, synthetically created contexts. Experimental results on simulated low-resource settings show that our method improves translation quality by up to 2.9 BLEU points over the baseline and up to 3.2 BLEU over back-translation."
}
Markdown (Informal)
[Data Augmentation for Low-Resource Neural Machine Translation](https://preview.aclanthology.org/fix-sig-urls/P17-2090/) (Fadaee et al., ACL 2017)
ACL
- Marzieh Fadaee, Arianna Bisazza, and Christof Monz. 2017. Data Augmentation for Low-Resource Neural Machine Translation. In Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers), pages 567–573, Vancouver, Canada. Association for Computational Linguistics.