@inproceedings{papavassiliou-etal-2018-ilsp,
title = "The {ILSP}/{ARC} submission to the {WMT} 2018 Parallel Corpus Filtering Shared Task",
author = "Papavassiliou, Vassilis and
Sofianopoulos, Sokratis and
Prokopidis, Prokopis and
Piperidis, Stelios",
booktitle = "Proceedings of the Third Conference on Machine Translation: Shared Task Papers",
month = oct,
year = "2018",
address = "Belgium, Brussels",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W18-6484",
doi = "10.18653/v1/W18-6484",
pages = "928--933",
abstract = "This paper describes the submission of the Institute for Language and Speech Processing/Athena Research and Innovation Center (ILSP/ARC) for the WMT 2018 Parallel Corpus Filtering shared task. We explore several properties of sentences and sentence pairs that our system explored in the context of the task with the purpose of clustering sentence pairs according to their appropriateness in training MT systems. We also discuss alternative methods for ranking the sentence pairs of the most appropriate clusters with the aim of generating the two datasets (of 10 and 100 million words as required in the task) that were evaluated. By summarizing the results of several experiments that were carried out by the organizers during the evaluation phase, our submission achieved an average BLEU score of 26.41, even though it does not make use of any language-specific resources like bilingual lexica, monolingual corpora, or MT output, while the average score of the best participant system was 27.91.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="papavassiliou-etal-2018-ilsp">
<titleInfo>
<title>The ILSP/ARC submission to the WMT 2018 Parallel Corpus Filtering Shared Task</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vassilis</namePart>
<namePart type="family">Papavassiliou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sokratis</namePart>
<namePart type="family">Sofianopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Prokopis</namePart>
<namePart type="family">Prokopidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-oct</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third Conference on Machine Translation: Shared Task Papers</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Belgium, Brussels</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes the submission of the Institute for Language and Speech Processing/Athena Research and Innovation Center (ILSP/ARC) for the WMT 2018 Parallel Corpus Filtering shared task. We explore several properties of sentences and sentence pairs that our system explored in the context of the task with the purpose of clustering sentence pairs according to their appropriateness in training MT systems. We also discuss alternative methods for ranking the sentence pairs of the most appropriate clusters with the aim of generating the two datasets (of 10 and 100 million words as required in the task) that were evaluated. By summarizing the results of several experiments that were carried out by the organizers during the evaluation phase, our submission achieved an average BLEU score of 26.41, even though it does not make use of any language-specific resources like bilingual lexica, monolingual corpora, or MT output, while the average score of the best participant system was 27.91.</abstract>
<identifier type="citekey">papavassiliou-etal-2018-ilsp</identifier>
<identifier type="doi">10.18653/v1/W18-6484</identifier>
<location>
<url>https://aclanthology.org/W18-6484</url>
</location>
<part>
<date>2018-oct</date>
<extent unit="page">
<start>928</start>
<end>933</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The ILSP/ARC submission to the WMT 2018 Parallel Corpus Filtering Shared Task
%A Papavassiliou, Vassilis
%A Sofianopoulos, Sokratis
%A Prokopidis, Prokopis
%A Piperidis, Stelios
%S Proceedings of the Third Conference on Machine Translation: Shared Task Papers
%D 2018
%8 oct
%I Association for Computational Linguistics
%C Belgium, Brussels
%F papavassiliou-etal-2018-ilsp
%X This paper describes the submission of the Institute for Language and Speech Processing/Athena Research and Innovation Center (ILSP/ARC) for the WMT 2018 Parallel Corpus Filtering shared task. We explore several properties of sentences and sentence pairs that our system explored in the context of the task with the purpose of clustering sentence pairs according to their appropriateness in training MT systems. We also discuss alternative methods for ranking the sentence pairs of the most appropriate clusters with the aim of generating the two datasets (of 10 and 100 million words as required in the task) that were evaluated. By summarizing the results of several experiments that were carried out by the organizers during the evaluation phase, our submission achieved an average BLEU score of 26.41, even though it does not make use of any language-specific resources like bilingual lexica, monolingual corpora, or MT output, while the average score of the best participant system was 27.91.
%R 10.18653/v1/W18-6484
%U https://aclanthology.org/W18-6484
%U https://doi.org/10.18653/v1/W18-6484
%P 928-933
Markdown (Informal)
[The ILSP/ARC submission to the WMT 2018 Parallel Corpus Filtering Shared Task](https://aclanthology.org/W18-6484) (Papavassiliou et al., 2018)
ACL