@inproceedings{bergmanis-pinnis-2021-facilitating,
title = "Facilitating Terminology Translation with Target Lemma Annotations",
author = "Bergmanis, Toms and
Pinnis, M{\=a}rcis",
booktitle = "Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume",
month = apr,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.eacl-main.271",
doi = "10.18653/v1/2021.eacl-main.271",
pages = "3105--3111",
abstract = "Most of the recent work on terminology integration in machine translation has assumed that terminology translations are given already inflected in forms that are suitable for the target language sentence. In day-to-day work of professional translators, however, it is seldom the case as translators work with bilingual glossaries where terms are given in their dictionary forms; finding the right target language form is part of the translation process. We argue that the requirement for apriori specified target language forms is unrealistic and impedes the practical applicability of previous work. In this work, we propose to train machine translation systems using a source-side data augmentation method that annotates randomly selected source language words with their target language lemmas. We show that systems trained on such augmented data are readily usable for terminology integration in real-life translation scenarios. Our experiments on terminology translation into the morphologically complex Baltic and Uralic languages show an improvement of up to 7 BLEU points over baseline systems with no means for terminology integration and an average improvement of 4 BLEU points over the previous work. Results of the human evaluation indicate a 47.7{\%} absolute improvement over the previous work in term translation accuracy when translating into Latvian.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bergmanis-pinnis-2021-facilitating">
<titleInfo>
<title>Facilitating Terminology Translation with Target Lemma Annotations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Toms</namePart>
<namePart type="family">Bergmanis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mārcis</namePart>
<namePart type="family">Pinnis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-apr</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Most of the recent work on terminology integration in machine translation has assumed that terminology translations are given already inflected in forms that are suitable for the target language sentence. In day-to-day work of professional translators, however, it is seldom the case as translators work with bilingual glossaries where terms are given in their dictionary forms; finding the right target language form is part of the translation process. We argue that the requirement for apriori specified target language forms is unrealistic and impedes the practical applicability of previous work. In this work, we propose to train machine translation systems using a source-side data augmentation method that annotates randomly selected source language words with their target language lemmas. We show that systems trained on such augmented data are readily usable for terminology integration in real-life translation scenarios. Our experiments on terminology translation into the morphologically complex Baltic and Uralic languages show an improvement of up to 7 BLEU points over baseline systems with no means for terminology integration and an average improvement of 4 BLEU points over the previous work. Results of the human evaluation indicate a 47.7% absolute improvement over the previous work in term translation accuracy when translating into Latvian.</abstract>
<identifier type="citekey">bergmanis-pinnis-2021-facilitating</identifier>
<identifier type="doi">10.18653/v1/2021.eacl-main.271</identifier>
<location>
<url>https://aclanthology.org/2021.eacl-main.271</url>
</location>
<part>
<date>2021-apr</date>
<extent unit="page">
<start>3105</start>
<end>3111</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Facilitating Terminology Translation with Target Lemma Annotations
%A Bergmanis, Toms
%A Pinnis, Mārcis
%S Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume
%D 2021
%8 apr
%I Association for Computational Linguistics
%C Online
%F bergmanis-pinnis-2021-facilitating
%X Most of the recent work on terminology integration in machine translation has assumed that terminology translations are given already inflected in forms that are suitable for the target language sentence. In day-to-day work of professional translators, however, it is seldom the case as translators work with bilingual glossaries where terms are given in their dictionary forms; finding the right target language form is part of the translation process. We argue that the requirement for apriori specified target language forms is unrealistic and impedes the practical applicability of previous work. In this work, we propose to train machine translation systems using a source-side data augmentation method that annotates randomly selected source language words with their target language lemmas. We show that systems trained on such augmented data are readily usable for terminology integration in real-life translation scenarios. Our experiments on terminology translation into the morphologically complex Baltic and Uralic languages show an improvement of up to 7 BLEU points over baseline systems with no means for terminology integration and an average improvement of 4 BLEU points over the previous work. Results of the human evaluation indicate a 47.7% absolute improvement over the previous work in term translation accuracy when translating into Latvian.
%R 10.18653/v1/2021.eacl-main.271
%U https://aclanthology.org/2021.eacl-main.271
%U https://doi.org/10.18653/v1/2021.eacl-main.271
%P 3105-3111
Markdown (Informal)
[Facilitating Terminology Translation with Target Lemma Annotations](https://aclanthology.org/2021.eacl-main.271) (Bergmanis & Pinnis, EACL 2021)
ACL