@inproceedings{wang-etal-2018-denoising,
title = "Denoising Neural Machine Translation Training with Trusted Data and Online Data Selection",
author = "Wang, Wei and
Watanabe, Taro and
Hughes, Macduff and
Nakagawa, Tetsuji and
Chelba, Ciprian",
booktitle = "Proceedings of the Third Conference on Machine Translation: Research Papers",
month = oct,
year = "2018",
address = "Brussels, Belgium",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W18-6314",
doi = "10.18653/v1/W18-6314",
pages = "133--143",
abstract = "Measuring domain relevance of data and identifying or selecting well-fit domain data for machine translation (MT) is a well-studied topic, but denoising is not yet. Denoising is concerned with a different type of data quality and tries to reduce the negative impact of data noise on MT training, in particular, neural MT (NMT) training. This paper generalizes methods for measuring and selecting data for domain MT and applies them to denoising NMT training. The proposed approach uses trusted data and a denoising curriculum realized by online data selection. Intrinsic and extrinsic evaluations of the approach show its significant effectiveness for NMT to train on data with severe noise.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wang-etal-2018-denoising">
<titleInfo>
<title>Denoising Neural Machine Translation Training with Trusted Data and Online Data Selection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wei</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Taro</namePart>
<namePart type="family">Watanabe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Macduff</namePart>
<namePart type="family">Hughes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tetsuji</namePart>
<namePart type="family">Nakagawa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ciprian</namePart>
<namePart type="family">Chelba</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-oct</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third Conference on Machine Translation: Research Papers</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Brussels, Belgium</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Measuring domain relevance of data and identifying or selecting well-fit domain data for machine translation (MT) is a well-studied topic, but denoising is not yet. Denoising is concerned with a different type of data quality and tries to reduce the negative impact of data noise on MT training, in particular, neural MT (NMT) training. This paper generalizes methods for measuring and selecting data for domain MT and applies them to denoising NMT training. The proposed approach uses trusted data and a denoising curriculum realized by online data selection. Intrinsic and extrinsic evaluations of the approach show its significant effectiveness for NMT to train on data with severe noise.</abstract>
<identifier type="citekey">wang-etal-2018-denoising</identifier>
<identifier type="doi">10.18653/v1/W18-6314</identifier>
<location>
<url>https://aclanthology.org/W18-6314</url>
</location>
<part>
<date>2018-oct</date>
<extent unit="page">
<start>133</start>
<end>143</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Denoising Neural Machine Translation Training with Trusted Data and Online Data Selection
%A Wang, Wei
%A Watanabe, Taro
%A Hughes, Macduff
%A Nakagawa, Tetsuji
%A Chelba, Ciprian
%S Proceedings of the Third Conference on Machine Translation: Research Papers
%D 2018
%8 oct
%I Association for Computational Linguistics
%C Brussels, Belgium
%F wang-etal-2018-denoising
%X Measuring domain relevance of data and identifying or selecting well-fit domain data for machine translation (MT) is a well-studied topic, but denoising is not yet. Denoising is concerned with a different type of data quality and tries to reduce the negative impact of data noise on MT training, in particular, neural MT (NMT) training. This paper generalizes methods for measuring and selecting data for domain MT and applies them to denoising NMT training. The proposed approach uses trusted data and a denoising curriculum realized by online data selection. Intrinsic and extrinsic evaluations of the approach show its significant effectiveness for NMT to train on data with severe noise.
%R 10.18653/v1/W18-6314
%U https://aclanthology.org/W18-6314
%U https://doi.org/10.18653/v1/W18-6314
%P 133-143
Markdown (Informal)
[Denoising Neural Machine Translation Training with Trusted Data and Online Data Selection](https://aclanthology.org/W18-6314) (Wang et al., 2018)
ACL