@inproceedings{ailem-etal-2021-lingua,
title = "Lingua Custodia{'}s Participation at the {WMT} 2021 Machine Translation Using Terminologies Shared Task",
author = "Ailem, Melissa and
Liu, Jingshu and
Qader, Raheel",
booktitle = "Proceedings of the Sixth Conference on Machine Translation",
month = nov,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.wmt-1.78",
pages = "799--803",
abstract = "This paper describes Lingua Custodia{'}s submission to the WMT21 shared task on machine translation using terminologies. We consider three directions, namely English to French, Russian, and Chinese. We rely on a Transformer-based architecture as a building block, and we explore a method which introduces two main changes to the standard procedure to handle terminologies. The first one consists in augmenting the training data in such a way as to encourage the model to learn a copy behavior when it encounters terminology constraint terms. The second change is constraint token masking, whose purpose is to ease copy behavior learning and to improve model generalization. Empirical results show that our method satisfies most terminology constraints while maintaining high translation quality.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ailem-etal-2021-lingua">
<titleInfo>
<title>Lingua Custodia’s Participation at the WMT 2021 Machine Translation Using Terminologies Shared Task</title>
</titleInfo>
<name type="personal">
<namePart type="given">Melissa</namePart>
<namePart type="family">Ailem</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jingshu</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Raheel</namePart>
<namePart type="family">Qader</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-nov</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Sixth Conference on Machine Translation</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes Lingua Custodia’s submission to the WMT21 shared task on machine translation using terminologies. We consider three directions, namely English to French, Russian, and Chinese. We rely on a Transformer-based architecture as a building block, and we explore a method which introduces two main changes to the standard procedure to handle terminologies. The first one consists in augmenting the training data in such a way as to encourage the model to learn a copy behavior when it encounters terminology constraint terms. The second change is constraint token masking, whose purpose is to ease copy behavior learning and to improve model generalization. Empirical results show that our method satisfies most terminology constraints while maintaining high translation quality.</abstract>
<identifier type="citekey">ailem-etal-2021-lingua</identifier>
<location>
<url>https://aclanthology.org/2021.wmt-1.78</url>
</location>
<part>
<date>2021-nov</date>
<extent unit="page">
<start>799</start>
<end>803</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Lingua Custodia’s Participation at the WMT 2021 Machine Translation Using Terminologies Shared Task
%A Ailem, Melissa
%A Liu, Jingshu
%A Qader, Raheel
%S Proceedings of the Sixth Conference on Machine Translation
%D 2021
%8 nov
%I Association for Computational Linguistics
%C Online
%F ailem-etal-2021-lingua
%X This paper describes Lingua Custodia’s submission to the WMT21 shared task on machine translation using terminologies. We consider three directions, namely English to French, Russian, and Chinese. We rely on a Transformer-based architecture as a building block, and we explore a method which introduces two main changes to the standard procedure to handle terminologies. The first one consists in augmenting the training data in such a way as to encourage the model to learn a copy behavior when it encounters terminology constraint terms. The second change is constraint token masking, whose purpose is to ease copy behavior learning and to improve model generalization. Empirical results show that our method satisfies most terminology constraints while maintaining high translation quality.
%U https://aclanthology.org/2021.wmt-1.78
%P 799-803
Markdown (Informal)
[Lingua Custodia’s Participation at the WMT 2021 Machine Translation Using Terminologies Shared Task](https://aclanthology.org/2021.wmt-1.78) (Ailem et al., WMT 2021)
ACL