@inproceedings{vazquez-etal-2019-university,
    title = "The {U}niversity of {H}elsinki Submission to the {WMT}19 Parallel Corpus Filtering Task",
    author = {V{\'a}zquez, Ra{\'u}l  and
      Sulubacak, Umut  and
      Tiedemann, J{\"o}rg},
    editor = "Bojar, Ond{\v{r}}ej  and
      Chatterjee, Rajen  and
      Federmann, Christian  and
      Fishel, Mark  and
      Graham, Yvette  and
      Haddow, Barry  and
      Huck, Matthias  and
      Yepes, Antonio Jimeno  and
      Koehn, Philipp  and
      Martins, Andr{\'e}  and
      Monz, Christof  and
      Negri, Matteo  and
      N{\'e}v{\'e}ol, Aur{\'e}lie  and
      Neves, Mariana  and
      Post, Matt  and
      Turchi, Marco  and
      Verspoor, Karin",
    booktitle = "Proceedings of the Fourth Conference on Machine Translation (Volume 3: Shared Task Papers, Day 2)",
    month = aug,
    year = "2019",
    address = "Florence, Italy",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/iwcs-25-ingestion/W19-5441/",
    doi = "10.18653/v1/W19-5441",
    pages = "294--300",
    abstract = "This paper describes the University of Helsinki Language Technology group{'}s participation in the WMT 2019 parallel corpus filtering task. Our scores were produced using a two-step strategy. First, we individually applied a series of filters to remove the `bad' quality sentences. Then, we produced scores for each sentence by weighting these features with a classification model. This methodology allowed us to build a simple and reliable system that is easily adaptable to other language pairs."
}Markdown (Informal)
[The University of Helsinki Submission to the WMT19 Parallel Corpus Filtering Task](https://preview.aclanthology.org/iwcs-25-ingestion/W19-5441/) (Vázquez et al., WMT 2019)
ACL