@inproceedings{pinnis-2018-tildes,
title = "Tilde`s Parallel Corpus Filtering Methods for {WMT} 2018",
author = "Pinnis, M{\={a}}rcis",
editor = "Bojar, Ond{\v{r}}ej and
Chatterjee, Rajen and
Federmann, Christian and
Fishel, Mark and
Graham, Yvette and
Haddow, Barry and
Huck, Matthias and
Yepes, Antonio Jimeno and
Koehn, Philipp and
Monz, Christof and
Negri, Matteo and
N{\'e}v{\'e}ol, Aur{\'e}lie and
Neves, Mariana and
Post, Matt and
Specia, Lucia and
Turchi, Marco and
Verspoor, Karin",
booktitle = "Proceedings of the Third Conference on Machine Translation: Shared Task Papers",
month = oct,
year = "2018",
address = "Belgium, Brussels",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/W18-6486/",
doi = "10.18653/v1/W18-6486",
pages = "939--945",
abstract = "The paper describes parallel corpus filtering methods that allow reducing noise of noisy {\textquotedblleft}parallel{\textquotedblright} corpora from a level where the corpora are not usable for neural machine translation training (i.e., the resulting systems fail to achieve reasonable translation quality; well below 10 BLEU points) up to a level where the trained systems show decent (over 20 BLEU points on a 10 million word dataset and up to 30 BLEU points on a 100 million word dataset). The paper also documents Tilde`s submissions to the WMT 2018 shared task on parallel corpus filtering."
}
Markdown (Informal)
[Tilde’s Parallel Corpus Filtering Methods for WMT 2018](https://preview.aclanthology.org/jlcl-multiple-ingestion/W18-6486/) (Pinnis, WMT 2018)
ACL