@inproceedings{sanchez-cartagena-2018-prompsit-,
title = "Prompsit{'}s Submission to the {IWSLT} 2018 Low Resource Machine Translation Task",
author = "S{\'a}nchez-Cartagena, V{\'\i}ctor M.",
booktitle = "Proceedings of the 15th International Conference on Spoken Language Translation",
month = oct # " 29-30",
year = "2018",
address = "Brussels",
publisher = "International Conference on Spoken Language Translation",
url = "https://aclanthology.org/2018.iwslt-1.14",
pages = "95--103",
abstract = "This paper presents Prompsit Language Engineering{'}s submission to the IWSLT 2018 Low Resource Machine Translation task. Our submission is based on cross-lingual learning: a multilingual neural machine translation system was created with the sole purpose of improving translation quality on the Basque-to-English language pair. The multilingual system was trained on a combination of in-domain data, pseudo in-domain data obtained via cross-entropy data selection and backtranslated data. We morphologically segmented Basque text with a novel approach that only requires a dictionary such as those used by spell checkers and proved that this segmentation approach outperforms the widespread byte pair encoding strategy for this task.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sanchez-cartagena-2018-prompsit-">
<titleInfo>
<title>Prompsit’s Submission to the IWSLT 2018 Low Resource Machine Translation Task</title>
</titleInfo>
<name type="personal">
<namePart type="given">Víctor</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Sánchez-Cartagena</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-oct" 29-30"</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 15th International Conference on Spoken Language Translation</title>
</titleInfo>
<originInfo>
<publisher>International Conference on Spoken Language Translation</publisher>
<place>
<placeTerm type="text">Brussels</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents Prompsit Language Engineering’s submission to the IWSLT 2018 Low Resource Machine Translation task. Our submission is based on cross-lingual learning: a multilingual neural machine translation system was created with the sole purpose of improving translation quality on the Basque-to-English language pair. The multilingual system was trained on a combination of in-domain data, pseudo in-domain data obtained via cross-entropy data selection and backtranslated data. We morphologically segmented Basque text with a novel approach that only requires a dictionary such as those used by spell checkers and proved that this segmentation approach outperforms the widespread byte pair encoding strategy for this task.</abstract>
<identifier type="citekey">sanchez-cartagena-2018-prompsit-</identifier>
<location>
<url>https://aclanthology.org/2018.iwslt-1.14</url>
</location>
<part>
<date>2018-oct" 29-30"</date>
<extent unit="page">
<start>95</start>
<end>103</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Prompsit’s Submission to the IWSLT 2018 Low Resource Machine Translation Task
%A Sánchez-Cartagena, Víctor M.
%S Proceedings of the 15th International Conference on Spoken Language Translation
%D 2018
%8 oct" 29 30"
%I International Conference on Spoken Language Translation
%C Brussels
%F sanchez-cartagena-2018-prompsit-
%X This paper presents Prompsit Language Engineering’s submission to the IWSLT 2018 Low Resource Machine Translation task. Our submission is based on cross-lingual learning: a multilingual neural machine translation system was created with the sole purpose of improving translation quality on the Basque-to-English language pair. The multilingual system was trained on a combination of in-domain data, pseudo in-domain data obtained via cross-entropy data selection and backtranslated data. We morphologically segmented Basque text with a novel approach that only requires a dictionary such as those used by spell checkers and proved that this segmentation approach outperforms the widespread byte pair encoding strategy for this task.
%U https://aclanthology.org/2018.iwslt-1.14
%P 95-103
Markdown (Informal)
[Prompsit’s Submission to the IWSLT 2018 Low Resource Machine Translation Task](https://aclanthology.org/2018.iwslt-1.14) (Sánchez-Cartagena, IWSLT 2018)
ACL