@inproceedings{axelrod-etal-2019-dual,
title = "Dual Monolingual Cross-Entropy Delta Filtering of Noisy Parallel Data",
author = "Axelrod, Amittai and
Kumar, Anish and
Sloto, Steve",
editor = "Bojar, Ond{\v{r}}ej and
Chatterjee, Rajen and
Federmann, Christian and
Fishel, Mark and
Graham, Yvette and
Haddow, Barry and
Huck, Matthias and
Yepes, Antonio Jimeno and
Koehn, Philipp and
Martins, Andr{\'e} and
Monz, Christof and
Negri, Matteo and
N{\'e}v{\'e}ol, Aur{\'e}lie and
Neves, Mariana and
Post, Matt and
Turchi, Marco and
Verspoor, Karin",
booktitle = "Proceedings of the Fourth Conference on Machine Translation (Volume 3: Shared Task Papers, Day 2)",
month = aug,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/Add-Cong-Liu-Florida-Atlantic-University-author-id/W19-5433/",
doi = "10.18653/v1/W19-5433",
pages = "245--251",
abstract = "We introduce a purely monolingual approach to filtering for parallel data from a noisy corpus in a low-resource scenario. Our work is inspired by Junczysdowmunt:2018, but we relax the requirements to allow for cases where no parallel data is available. Our primary contribution is a dual monolingual cross-entropy delta criterion modified from Cynical data selection Axelrod:2017, and is competitive (within 1.8 BLEU) with the best bilingual filtering method when used to train SMT systems. Our approach is featherweight, and runs end-to-end on a standard laptop in three hours."
}
Markdown (Informal)
[Dual Monolingual Cross-Entropy Delta Filtering of Noisy Parallel Data](https://preview.aclanthology.org/Add-Cong-Liu-Florida-Atlantic-University-author-id/W19-5433/) (Axelrod et al., WMT 2019)
ACL