@inproceedings{khatri-bhattacharyya-2020-filtering,
title = "Filtering Back-Translated Data in Unsupervised Neural Machine Translation",
author = "Khatri, Jyotsana and
Bhattacharyya, Pushpak",
editor = "Scott, Donia and
Bel, Nuria and
Zong, Chengqing",
booktitle = "Proceedings of the 28th International Conference on Computational Linguistics",
month = dec,
year = "2020",
address = "Barcelona, Spain (Online)",
publisher = "International Committee on Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2020.coling-main.383/",
doi = "10.18653/v1/2020.coling-main.383",
pages = "4334--4339",
abstract = "Unsupervised neural machine translation (NMT) utilizes only monolingual data for training. The quality of back-translated data plays an important role in the performance of NMT systems. In back-translation, all generated pseudo parallel sentence pairs are not of the same quality. Taking inspiration from domain adaptation where in-domain sentences are given more weight in training, in this paper we propose an approach to filter back-translated data as part of the training process of unsupervised NMT. Our approach gives more weight to good pseudo parallel sentence pairs in the back-translation phase. We calculate the weight of each pseudo parallel sentence pair using sentence-wise round-trip BLEU score which is normalized batch-wise. We compare our approach with the current state of the art approaches for unsupervised NMT."
}
Markdown (Informal)
[Filtering Back-Translated Data in Unsupervised Neural Machine Translation](https://preview.aclanthology.org/jlcl-multiple-ingestion/2020.coling-main.383/) (Khatri & Bhattacharyya, COLING 2020)
ACL