@inproceedings{barbu-barbu-mititelu-2018-hybrid,
title = "A hybrid pipeline of rules and machine learning to filter web-crawled parallel corpora",
author = "Barbu, Eduard and
Barbu Mititelu, Verginica",
editor = "Bojar, Ond{\v{r}}ej and
Chatterjee, Rajen and
Federmann, Christian and
Fishel, Mark and
Graham, Yvette and
Haddow, Barry and
Huck, Matthias and
Yepes, Antonio Jimeno and
Koehn, Philipp and
Monz, Christof and
Negri, Matteo and
N{\'e}v{\'e}ol, Aur{\'e}lie and
Neves, Mariana and
Post, Matt and
Specia, Lucia and
Turchi, Marco and
Verspoor, Karin",
booktitle = "Proceedings of the Third Conference on Machine Translation: Shared Task Papers",
month = oct,
year = "2018",
address = "Belgium, Brussels",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/W18-6474/",
doi = "10.18653/v1/W18-6474",
pages = "867--871",
abstract = "A hybrid pipeline comprising rules and machine learning is used to filter a noisy web English-German parallel corpus for the Parallel Corpus Filtering task. The core of the pipeline is a module based on the logistic regression algorithm that returns the probability that a translation unit is accepted. The training set for the logistic regression is created by automatic annotation. The quality of the automatic annotation is estimated by manually labeling the training set."
}
Markdown (Informal)
[A hybrid pipeline of rules and machine learning to filter web-crawled parallel corpora](https://preview.aclanthology.org/fix-sig-urls/W18-6474/) (Barbu & Barbu Mititelu, WMT 2018)
ACL