@inproceedings{khayrallah-etal-2018-jhu,
title = "The {JHU} Parallel Corpus Filtering Systems for {WMT} 2018",
author = "Khayrallah, Huda and
Xu, Hainan and
Koehn, Philipp",
editor = "Bojar, Ond{\v{r}}ej and
Chatterjee, Rajen and
Federmann, Christian and
Fishel, Mark and
Graham, Yvette and
Haddow, Barry and
Huck, Matthias and
Yepes, Antonio Jimeno and
Koehn, Philipp and
Monz, Christof and
Negri, Matteo and
N{\'e}v{\'e}ol, Aur{\'e}lie and
Neves, Mariana and
Post, Matt and
Specia, Lucia and
Turchi, Marco and
Verspoor, Karin",
booktitle = "Proceedings of the Third Conference on Machine Translation: Shared Task Papers",
month = oct,
year = "2018",
address = "Belgium, Brussels",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/Author-page-Marten-During-lu/W18-6479/",
doi = "10.18653/v1/W18-6479",
pages = "896--899",
abstract = "This work describes our submission to the WMT18 Parallel Corpus Filtering shared task. We use a slightly modified version of the Zipporah Corpus Filtering toolkit (Xu and Koehn, 2017), which computes an adequacy score and a fluency score on a sentence pair, and use a weighted sum of the scores as the selection criteria. This work differs from Zipporah in that we experiment with using the noisy corpus to be filtered to compute the combination weights, and thus avoids generating synthetic data as in standard Zipporah."
}
Markdown (Informal)
[The JHU Parallel Corpus Filtering Systems for WMT 2018](https://preview.aclanthology.org/Author-page-Marten-During-lu/W18-6479/) (Khayrallah et al., WMT 2018)
ACL