@inproceedings{kejriwal-koehn-2020-exploratory,
title = "An exploratory approach to the Parallel Corpus Filtering shared task {WMT}20",
author = "Kejriwal, Ankur and
Koehn, Philipp",
editor = {Barrault, Lo{\"i}c and
Bojar, Ond{\v{r}}ej and
Bougares, Fethi and
Chatterjee, Rajen and
Costa-juss{\`a}, Marta R. and
Federmann, Christian and
Fishel, Mark and
Fraser, Alexander and
Graham, Yvette and
Guzman, Paco and
Haddow, Barry and
Huck, Matthias and
Yepes, Antonio Jimeno and
Koehn, Philipp and
Martins, Andr{\'e} and
Morishita, Makoto and
Monz, Christof and
Nagata, Masaaki and
Nakazawa, Toshiaki and
Negri, Matteo},
booktitle = "Proceedings of the Fifth Conference on Machine Translation",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2020.wmt-1.108/",
pages = "959--965",
abstract = "In this document we describe our submission to the parallel corpus filtering task using multilingual word embedding, language models and an ensemble of pre and post filtering rules. We use the norms of embedding and the perplexities of language models along with pre/post filtering rules to complement the LASER baseline scores and in the end get an improvement on the dev set in both language pairs."
}
Markdown (Informal)
[An exploratory approach to the Parallel Corpus Filtering shared task WMT20](https://preview.aclanthology.org/add-emnlp-2024-awards/2020.wmt-1.108/) (Kejriwal & Koehn, WMT 2020)
ACL