@inproceedings{hayakawa-arase-2020-fine,
title = "Fine-Grained Error Analysis on {E}nglish-to-{J}apanese Machine Translation in the Medical Domain",
author = "Hayakawa, Takeshi and
Arase, Yuki",
booktitle = "Proceedings of the 22nd Annual Conference of the European Association for Machine Translation",
month = nov,
year = "2020",
address = "Lisboa, Portugal",
publisher = "European Association for Machine Translation",
url = "https://aclanthology.org/2020.eamt-1.17",
pages = "155--164",
abstract = "We performed a detailed error analysis in domain-specific neural machine translation (NMT) for the English and Japanese language pair with fine-grained manual annotation. Despite its importance for advancing NMT technologies, research on the performance of domain-specific NMT and non-European languages has been limited. In this study, we designed an error typology based on the error types that were typically generated by NMT systems and might cause significant impact in technical translations: {``}Addition,{''} {``}Omission,{''} {``}Mistranslation,{''} {``}Grammar,{''} and {``}Terminology.{''} The error annotation was targeted to the medical domain and was performed by experienced professional translators specialized in medicine under careful quality control. The annotation detected 4,912 errors on 2,480 sentences, and the frequency and distribution of errors were analyzed. We found that the major errors in NMT were {``}Mistranslation{''} and {``}Terminology{''} rather than {``}Addition{''} and {``}Omission,{''} which have been reported as typical problems of NMT. Interestingly, more errors occurred in documents for professionals compared with those for the general public. The results of our annotation work will be published as a parallel corpus with error labels, which are expected to contribute to developing better NMT models, automatic evaluation metrics, and quality estimation models.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hayakawa-arase-2020-fine">
<titleInfo>
<title>Fine-Grained Error Analysis on English-to-Japanese Machine Translation in the Medical Domain</title>
</titleInfo>
<name type="personal">
<namePart type="given">Takeshi</namePart>
<namePart type="family">Hayakawa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuki</namePart>
<namePart type="family">Arase</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-nov</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 22nd Annual Conference of the European Association for Machine Translation</title>
</titleInfo>
<originInfo>
<publisher>European Association for Machine Translation</publisher>
<place>
<placeTerm type="text">Lisboa, Portugal</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We performed a detailed error analysis in domain-specific neural machine translation (NMT) for the English and Japanese language pair with fine-grained manual annotation. Despite its importance for advancing NMT technologies, research on the performance of domain-specific NMT and non-European languages has been limited. In this study, we designed an error typology based on the error types that were typically generated by NMT systems and might cause significant impact in technical translations: “Addition,” “Omission,” “Mistranslation,” “Grammar,” and “Terminology.” The error annotation was targeted to the medical domain and was performed by experienced professional translators specialized in medicine under careful quality control. The annotation detected 4,912 errors on 2,480 sentences, and the frequency and distribution of errors were analyzed. We found that the major errors in NMT were “Mistranslation” and “Terminology” rather than “Addition” and “Omission,” which have been reported as typical problems of NMT. Interestingly, more errors occurred in documents for professionals compared with those for the general public. The results of our annotation work will be published as a parallel corpus with error labels, which are expected to contribute to developing better NMT models, automatic evaluation metrics, and quality estimation models.</abstract>
<identifier type="citekey">hayakawa-arase-2020-fine</identifier>
<location>
<url>https://aclanthology.org/2020.eamt-1.17</url>
</location>
<part>
<date>2020-nov</date>
<extent unit="page">
<start>155</start>
<end>164</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Fine-Grained Error Analysis on English-to-Japanese Machine Translation in the Medical Domain
%A Hayakawa, Takeshi
%A Arase, Yuki
%S Proceedings of the 22nd Annual Conference of the European Association for Machine Translation
%D 2020
%8 nov
%I European Association for Machine Translation
%C Lisboa, Portugal
%F hayakawa-arase-2020-fine
%X We performed a detailed error analysis in domain-specific neural machine translation (NMT) for the English and Japanese language pair with fine-grained manual annotation. Despite its importance for advancing NMT technologies, research on the performance of domain-specific NMT and non-European languages has been limited. In this study, we designed an error typology based on the error types that were typically generated by NMT systems and might cause significant impact in technical translations: “Addition,” “Omission,” “Mistranslation,” “Grammar,” and “Terminology.” The error annotation was targeted to the medical domain and was performed by experienced professional translators specialized in medicine under careful quality control. The annotation detected 4,912 errors on 2,480 sentences, and the frequency and distribution of errors were analyzed. We found that the major errors in NMT were “Mistranslation” and “Terminology” rather than “Addition” and “Omission,” which have been reported as typical problems of NMT. Interestingly, more errors occurred in documents for professionals compared with those for the general public. The results of our annotation work will be published as a parallel corpus with error labels, which are expected to contribute to developing better NMT models, automatic evaluation metrics, and quality estimation models.
%U https://aclanthology.org/2020.eamt-1.17
%P 155-164
Markdown (Informal)
[Fine-Grained Error Analysis on English-to-Japanese Machine Translation in the Medical Domain](https://aclanthology.org/2020.eamt-1.17) (Hayakawa & Arase, EAMT 2020)
ACL