@inproceedings{labaka-etal-2016-domain,
title = "Domain Adaptation in {MT} Using Titles in {W}ikipedia as a Parallel Corpus: Resources and Evaluation",
author = "Labaka, Gorka and
Alegria, I{\~n}aki and
Sarasola, Kepa",
booktitle = "Proceedings of the Tenth International Conference on Language Resources and Evaluation ({LREC}'16)",
month = may,
year = "2016",
address = "Portoro{\v{z}}, Slovenia",
publisher = "European Language Resources Association (ELRA)",
url = "https://aclanthology.org/L16-1351",
pages = "2209--2213",
abstract = "This paper presents how an state-of-the-art SMT system is enriched by using an extra in-domain parallel corpora extracted from Wikipedia. We collect corpora from parallel titles and from parallel fragments in comparable articles from Wikipedia. We carried out an evaluation with a double objective: evaluating the quality of the extracted data and evaluating the improvement due to the domain-adaptation. We think this can be very useful for languages with limited amount of parallel corpora, where in-domain data is crucial to improve the performance of MT sytems. The experiments on the Spanish-English language pair improve a baseline trained with the Europarl corpus in more than 2 points of BLEU when translating in the Computer Science domain.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="labaka-etal-2016-domain">
<titleInfo>
<title>Domain Adaptation in MT Using Titles in Wikipedia as a Parallel Corpus: Resources and Evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Gorka</namePart>
<namePart type="family">Labaka</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iñaki</namePart>
<namePart type="family">Alegria</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kepa</namePart>
<namePart type="family">Sarasola</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016-may</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Portorož, Slovenia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents how an state-of-the-art SMT system is enriched by using an extra in-domain parallel corpora extracted from Wikipedia. We collect corpora from parallel titles and from parallel fragments in comparable articles from Wikipedia. We carried out an evaluation with a double objective: evaluating the quality of the extracted data and evaluating the improvement due to the domain-adaptation. We think this can be very useful for languages with limited amount of parallel corpora, where in-domain data is crucial to improve the performance of MT sytems. The experiments on the Spanish-English language pair improve a baseline trained with the Europarl corpus in more than 2 points of BLEU when translating in the Computer Science domain.</abstract>
<identifier type="citekey">labaka-etal-2016-domain</identifier>
<location>
<url>https://aclanthology.org/L16-1351</url>
</location>
<part>
<date>2016-may</date>
<extent unit="page">
<start>2209</start>
<end>2213</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Domain Adaptation in MT Using Titles in Wikipedia as a Parallel Corpus: Resources and Evaluation
%A Labaka, Gorka
%A Alegria, Iñaki
%A Sarasola, Kepa
%S Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)
%D 2016
%8 may
%I European Language Resources Association (ELRA)
%C Portorož, Slovenia
%F labaka-etal-2016-domain
%X This paper presents how an state-of-the-art SMT system is enriched by using an extra in-domain parallel corpora extracted from Wikipedia. We collect corpora from parallel titles and from parallel fragments in comparable articles from Wikipedia. We carried out an evaluation with a double objective: evaluating the quality of the extracted data and evaluating the improvement due to the domain-adaptation. We think this can be very useful for languages with limited amount of parallel corpora, where in-domain data is crucial to improve the performance of MT sytems. The experiments on the Spanish-English language pair improve a baseline trained with the Europarl corpus in more than 2 points of BLEU when translating in the Computer Science domain.
%U https://aclanthology.org/L16-1351
%P 2209-2213
Markdown (Informal)
[Domain Adaptation in MT Using Titles in Wikipedia as a Parallel Corpus: Resources and Evaluation](https://aclanthology.org/L16-1351) (Labaka et al., LREC 2016)
ACL