@inproceedings{marchisio-etal-2020-unsupervised,
title = "When Does Unsupervised Machine Translation Work?",
author = "Marchisio, Kelly and
Duh, Kevin and
Koehn, Philipp",
booktitle = "Proceedings of the Fifth Conference on Machine Translation",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.wmt-1.68",
pages = "571--583",
abstract = "Despite the reported success of unsupervised machine translation (MT), the field has yet to examine the conditions under which the methods succeed and fail. We conduct an extensive empirical evaluation using dissimilar language pairs, dissimilar domains, and diverse datasets. We find that performance rapidly deteriorates when source and target corpora are from different domains, and that stochasticity during embedding training can dramatically affect downstream results. We additionally find that unsupervised MT performance declines when source and target languages use different scripts, and observe very poor performance on authentic low-resource language pairs. We advocate for extensive empirical evaluation of unsupervised MT systems to highlight failure points and encourage continued research on the most promising paradigms. We release our preprocessed dataset to encourage evaluations that stress-test systems under multiple data conditions.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="marchisio-etal-2020-unsupervised">
<titleInfo>
<title>When Does Unsupervised Machine Translation Work?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kelly</namePart>
<namePart type="family">Marchisio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="family">Duh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Koehn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-nov</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth Conference on Machine Translation</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Despite the reported success of unsupervised machine translation (MT), the field has yet to examine the conditions under which the methods succeed and fail. We conduct an extensive empirical evaluation using dissimilar language pairs, dissimilar domains, and diverse datasets. We find that performance rapidly deteriorates when source and target corpora are from different domains, and that stochasticity during embedding training can dramatically affect downstream results. We additionally find that unsupervised MT performance declines when source and target languages use different scripts, and observe very poor performance on authentic low-resource language pairs. We advocate for extensive empirical evaluation of unsupervised MT systems to highlight failure points and encourage continued research on the most promising paradigms. We release our preprocessed dataset to encourage evaluations that stress-test systems under multiple data conditions.</abstract>
<identifier type="citekey">marchisio-etal-2020-unsupervised</identifier>
<location>
<url>https://aclanthology.org/2020.wmt-1.68</url>
</location>
<part>
<date>2020-nov</date>
<extent unit="page">
<start>571</start>
<end>583</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T When Does Unsupervised Machine Translation Work?
%A Marchisio, Kelly
%A Duh, Kevin
%A Koehn, Philipp
%S Proceedings of the Fifth Conference on Machine Translation
%D 2020
%8 nov
%I Association for Computational Linguistics
%C Online
%F marchisio-etal-2020-unsupervised
%X Despite the reported success of unsupervised machine translation (MT), the field has yet to examine the conditions under which the methods succeed and fail. We conduct an extensive empirical evaluation using dissimilar language pairs, dissimilar domains, and diverse datasets. We find that performance rapidly deteriorates when source and target corpora are from different domains, and that stochasticity during embedding training can dramatically affect downstream results. We additionally find that unsupervised MT performance declines when source and target languages use different scripts, and observe very poor performance on authentic low-resource language pairs. We advocate for extensive empirical evaluation of unsupervised MT systems to highlight failure points and encourage continued research on the most promising paradigms. We release our preprocessed dataset to encourage evaluations that stress-test systems under multiple data conditions.
%U https://aclanthology.org/2020.wmt-1.68
%P 571-583
Markdown (Informal)
[When Does Unsupervised Machine Translation Work?](https://aclanthology.org/2020.wmt-1.68) (Marchisio et al., WMT 2020)
ACL