@inproceedings{paetzold-2018-utfpr-wmt,
title = "{UTFPR} at {WMT} 2018: Minimalistic Supervised Corpora Filtering for Machine Translation",
author = "Paetzold, Gustavo",
booktitle = "Proceedings of the Third Conference on Machine Translation: Shared Task Papers",
month = oct,
year = "2018",
address = "Belgium, Brussels",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W18-6483",
doi = "10.18653/v1/W18-6483",
pages = "923--927",
abstract = "We present the UTFPR systems at the WMT 2018 parallel corpus filtering task. Our supervised approach discerns between good and bad translations by training classic binary classification models over an artificially produced binary classification dataset derived from a high-quality translation set, and a minimalistic set of 6 semantic distance features that rely only on easy-to-gather resources. We rank translations by their probability for the {``}good{''} label. Our results show that logistic regression pairs best with our approach, yielding more consistent results throughout the different settings evaluated.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="paetzold-2018-utfpr-wmt">
<titleInfo>
<title>UTFPR at WMT 2018: Minimalistic Supervised Corpora Filtering for Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Gustavo</namePart>
<namePart type="family">Paetzold</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-oct</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third Conference on Machine Translation: Shared Task Papers</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Belgium, Brussels</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present the UTFPR systems at the WMT 2018 parallel corpus filtering task. Our supervised approach discerns between good and bad translations by training classic binary classification models over an artificially produced binary classification dataset derived from a high-quality translation set, and a minimalistic set of 6 semantic distance features that rely only on easy-to-gather resources. We rank translations by their probability for the “good” label. Our results show that logistic regression pairs best with our approach, yielding more consistent results throughout the different settings evaluated.</abstract>
<identifier type="citekey">paetzold-2018-utfpr-wmt</identifier>
<identifier type="doi">10.18653/v1/W18-6483</identifier>
<location>
<url>https://aclanthology.org/W18-6483</url>
</location>
<part>
<date>2018-oct</date>
<extent unit="page">
<start>923</start>
<end>927</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T UTFPR at WMT 2018: Minimalistic Supervised Corpora Filtering for Machine Translation
%A Paetzold, Gustavo
%S Proceedings of the Third Conference on Machine Translation: Shared Task Papers
%D 2018
%8 oct
%I Association for Computational Linguistics
%C Belgium, Brussels
%F paetzold-2018-utfpr-wmt
%X We present the UTFPR systems at the WMT 2018 parallel corpus filtering task. Our supervised approach discerns between good and bad translations by training classic binary classification models over an artificially produced binary classification dataset derived from a high-quality translation set, and a minimalistic set of 6 semantic distance features that rely only on easy-to-gather resources. We rank translations by their probability for the “good” label. Our results show that logistic regression pairs best with our approach, yielding more consistent results throughout the different settings evaluated.
%R 10.18653/v1/W18-6483
%U https://aclanthology.org/W18-6483
%U https://doi.org/10.18653/v1/W18-6483
%P 923-927
Markdown (Informal)
[UTFPR at WMT 2018: Minimalistic Supervised Corpora Filtering for Machine Translation](https://aclanthology.org/W18-6483) (Paetzold, 2018)
ACL