@inproceedings{vazquez-etal-2019-university,
title = "The {U}niversity of {H}elsinki Submission to the {WMT}19 Parallel Corpus Filtering Task",
author = {V{\'a}zquez, Ra{\'u}l and
Sulubacak, Umut and
Tiedemann, J{\"o}rg},
booktitle = "Proceedings of the Fourth Conference on Machine Translation (Volume 3: Shared Task Papers, Day 2)",
month = aug,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W19-5441",
doi = "10.18653/v1/W19-5441",
pages = "294--300",
abstract = "This paper describes the University of Helsinki Language Technology group{'}s participation in the WMT 2019 parallel corpus filtering task. Our scores were produced using a two-step strategy. First, we individually applied a series of filters to remove the {`}bad{'} quality sentences. Then, we produced scores for each sentence by weighting these features with a classification model. This methodology allowed us to build a simple and reliable system that is easily adaptable to other language pairs.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="vazquez-etal-2019-university">
<titleInfo>
<title>The University of Helsinki Submission to the WMT19 Parallel Corpus Filtering Task</title>
</titleInfo>
<name type="personal">
<namePart type="given">Raúl</namePart>
<namePart type="family">Vázquez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Umut</namePart>
<namePart type="family">Sulubacak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jörg</namePart>
<namePart type="family">Tiedemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-aug</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth Conference on Machine Translation (Volume 3: Shared Task Papers, Day 2)</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Florence, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes the University of Helsinki Language Technology group’s participation in the WMT 2019 parallel corpus filtering task. Our scores were produced using a two-step strategy. First, we individually applied a series of filters to remove the ‘bad’ quality sentences. Then, we produced scores for each sentence by weighting these features with a classification model. This methodology allowed us to build a simple and reliable system that is easily adaptable to other language pairs.</abstract>
<identifier type="citekey">vazquez-etal-2019-university</identifier>
<identifier type="doi">10.18653/v1/W19-5441</identifier>
<location>
<url>https://aclanthology.org/W19-5441</url>
</location>
<part>
<date>2019-aug</date>
<extent unit="page">
<start>294</start>
<end>300</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The University of Helsinki Submission to the WMT19 Parallel Corpus Filtering Task
%A Vázquez, Raúl
%A Sulubacak, Umut
%A Tiedemann, Jörg
%S Proceedings of the Fourth Conference on Machine Translation (Volume 3: Shared Task Papers, Day 2)
%D 2019
%8 aug
%I Association for Computational Linguistics
%C Florence, Italy
%F vazquez-etal-2019-university
%X This paper describes the University of Helsinki Language Technology group’s participation in the WMT 2019 parallel corpus filtering task. Our scores were produced using a two-step strategy. First, we individually applied a series of filters to remove the ‘bad’ quality sentences. Then, we produced scores for each sentence by weighting these features with a classification model. This methodology allowed us to build a simple and reliable system that is easily adaptable to other language pairs.
%R 10.18653/v1/W19-5441
%U https://aclanthology.org/W19-5441
%U https://doi.org/10.18653/v1/W19-5441
%P 294-300
Markdown (Informal)
[The University of Helsinki Submission to the WMT19 Parallel Corpus Filtering Task](https://aclanthology.org/W19-5441) (Vázquez et al., 2019)
ACL