@inproceedings{kejriwal-koehn-2020-exploratory,
title = "An exploratory approach to the Parallel Corpus Filtering shared task {WMT}20",
author = "Kejriwal, Ankur and
Koehn, Philipp",
booktitle = "Proceedings of the Fifth Conference on Machine Translation",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.wmt-1.108",
pages = "959--965",
abstract = "In this document we describe our submission to the parallel corpus filtering task using multilingual word embedding, language models and an ensemble of pre and post filtering rules. We use the norms of embedding and the perplexities of language models along with pre/post filtering rules to complement the LASER baseline scores and in the end get an improvement on the dev set in both language pairs.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kejriwal-koehn-2020-exploratory">
<titleInfo>
<title>An exploratory approach to the Parallel Corpus Filtering shared task WMT20</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ankur</namePart>
<namePart type="family">Kejriwal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Koehn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-nov</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth Conference on Machine Translation</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this document we describe our submission to the parallel corpus filtering task using multilingual word embedding, language models and an ensemble of pre and post filtering rules. We use the norms of embedding and the perplexities of language models along with pre/post filtering rules to complement the LASER baseline scores and in the end get an improvement on the dev set in both language pairs.</abstract>
<identifier type="citekey">kejriwal-koehn-2020-exploratory</identifier>
<location>
<url>https://aclanthology.org/2020.wmt-1.108</url>
</location>
<part>
<date>2020-nov</date>
<extent unit="page">
<start>959</start>
<end>965</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T An exploratory approach to the Parallel Corpus Filtering shared task WMT20
%A Kejriwal, Ankur
%A Koehn, Philipp
%S Proceedings of the Fifth Conference on Machine Translation
%D 2020
%8 nov
%I Association for Computational Linguistics
%C Online
%F kejriwal-koehn-2020-exploratory
%X In this document we describe our submission to the parallel corpus filtering task using multilingual word embedding, language models and an ensemble of pre and post filtering rules. We use the norms of embedding and the perplexities of language models along with pre/post filtering rules to complement the LASER baseline scores and in the end get an improvement on the dev set in both language pairs.
%U https://aclanthology.org/2020.wmt-1.108
%P 959-965
Markdown (Informal)
[An exploratory approach to the Parallel Corpus Filtering shared task WMT20](https://aclanthology.org/2020.wmt-1.108) (Kejriwal & Koehn, WMT 2020)
ACL