@inproceedings{lee-etal-2020-noising,
title = "Noising Scheme for Data Augmentation in Automatic Post-Editing",
author = "Lee, WonKee and
Shin, Jaehun and
Jung, Baikjin and
Lee, Jihyung and
Lee, Jong-Hyeok",
booktitle = "Proceedings of the Fifth Conference on Machine Translation",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.wmt-1.83",
pages = "783--788",
abstract = "This paper describes POSTECH{'}s submission to WMT20 for the shared task on Automatic Post-Editing (APE). Our focus is on increasing the quantity of available APE data to overcome the shortage of human-crafted training data. In our experiment, we implemented a noising module that simulates four types of post-editing errors, and we introduced this module into a Transformer-based multi-source APE model. Our noising module implants errors into texts on the target side of parallel corpora during the training phase to make synthetic MT outputs, increasing the entire number of training samples. We also generated additional training data using the parallel corpora and NMT model that were released for the Quality Estimation task, and we used these data to train our APE model. Experimental results on the WMT20 English-German APE data set show improvements over the baseline in terms of both the TER and BLEU scores: our primary submission achieved an improvement of -3.15 TER and +4.01 BLEU, and our contrastive submission achieved an improvement of -3.34 TER and +4.30 BLEU.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lee-etal-2020-noising">
<titleInfo>
<title>Noising Scheme for Data Augmentation in Automatic Post-Editing</title>
</titleInfo>
<name type="personal">
<namePart type="given">WonKee</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jaehun</namePart>
<namePart type="family">Shin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Baikjin</namePart>
<namePart type="family">Jung</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jihyung</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jong-Hyeok</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-nov</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth Conference on Machine Translation</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes POSTECH’s submission to WMT20 for the shared task on Automatic Post-Editing (APE). Our focus is on increasing the quantity of available APE data to overcome the shortage of human-crafted training data. In our experiment, we implemented a noising module that simulates four types of post-editing errors, and we introduced this module into a Transformer-based multi-source APE model. Our noising module implants errors into texts on the target side of parallel corpora during the training phase to make synthetic MT outputs, increasing the entire number of training samples. We also generated additional training data using the parallel corpora and NMT model that were released for the Quality Estimation task, and we used these data to train our APE model. Experimental results on the WMT20 English-German APE data set show improvements over the baseline in terms of both the TER and BLEU scores: our primary submission achieved an improvement of -3.15 TER and +4.01 BLEU, and our contrastive submission achieved an improvement of -3.34 TER and +4.30 BLEU.</abstract>
<identifier type="citekey">lee-etal-2020-noising</identifier>
<location>
<url>https://aclanthology.org/2020.wmt-1.83</url>
</location>
<part>
<date>2020-nov</date>
<extent unit="page">
<start>783</start>
<end>788</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Noising Scheme for Data Augmentation in Automatic Post-Editing
%A Lee, WonKee
%A Shin, Jaehun
%A Jung, Baikjin
%A Lee, Jihyung
%A Lee, Jong-Hyeok
%S Proceedings of the Fifth Conference on Machine Translation
%D 2020
%8 nov
%I Association for Computational Linguistics
%C Online
%F lee-etal-2020-noising
%X This paper describes POSTECH’s submission to WMT20 for the shared task on Automatic Post-Editing (APE). Our focus is on increasing the quantity of available APE data to overcome the shortage of human-crafted training data. In our experiment, we implemented a noising module that simulates four types of post-editing errors, and we introduced this module into a Transformer-based multi-source APE model. Our noising module implants errors into texts on the target side of parallel corpora during the training phase to make synthetic MT outputs, increasing the entire number of training samples. We also generated additional training data using the parallel corpora and NMT model that were released for the Quality Estimation task, and we used these data to train our APE model. Experimental results on the WMT20 English-German APE data set show improvements over the baseline in terms of both the TER and BLEU scores: our primary submission achieved an improvement of -3.15 TER and +4.01 BLEU, and our contrastive submission achieved an improvement of -3.34 TER and +4.30 BLEU.
%U https://aclanthology.org/2020.wmt-1.83
%P 783-788
Markdown (Informal)
[Noising Scheme for Data Augmentation in Automatic Post-Editing](https://aclanthology.org/2020.wmt-1.83) (Lee et al., WMT 2020)
ACL