@inproceedings{imamura-sumita-2021-nict,
title = "{NICT}-2 Translation System at {WAT}-2021: Applying a Pretrained Multilingual Encoder-Decoder Model to Low-resource Language Pairs",
author = "Imamura, Kenji and
Sumita, Eiichiro",
booktitle = "Proceedings of the 8th Workshop on Asian Translation (WAT2021)",
month = aug,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.wat-1.8",
doi = "10.18653/v1/2021.wat-1.8",
pages = "90--95",
abstract = "In this paper, we present the NICT system (NICT-2) submitted to the NICT-SAP shared task at the 8th Workshop on Asian Translation (WAT-2021). A feature of our system is that we used a pretrained multilingual BART (Bidirectional and Auto-Regressive Transformer; mBART) model. Because publicly available models do not support some languages in the NICT-SAP task, we added these languages to the mBART model and then trained it using monolingual corpora extracted from Wikipedia. We fine-tuned the expanded mBART model using the parallel corpora specified by the NICT-SAP task. The BLEU scores greatly improved in comparison with those of systems without the pretrained model, including the additional languages.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="imamura-sumita-2021-nict">
<titleInfo>
<title>NICT-2 Translation System at WAT-2021: Applying a Pretrained Multilingual Encoder-Decoder Model to Low-resource Language Pairs</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kenji</namePart>
<namePart type="family">Imamura</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eiichiro</namePart>
<namePart type="family">Sumita</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-aug</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 8th Workshop on Asian Translation (WAT2021)</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we present the NICT system (NICT-2) submitted to the NICT-SAP shared task at the 8th Workshop on Asian Translation (WAT-2021). A feature of our system is that we used a pretrained multilingual BART (Bidirectional and Auto-Regressive Transformer; mBART) model. Because publicly available models do not support some languages in the NICT-SAP task, we added these languages to the mBART model and then trained it using monolingual corpora extracted from Wikipedia. We fine-tuned the expanded mBART model using the parallel corpora specified by the NICT-SAP task. The BLEU scores greatly improved in comparison with those of systems without the pretrained model, including the additional languages.</abstract>
<identifier type="citekey">imamura-sumita-2021-nict</identifier>
<identifier type="doi">10.18653/v1/2021.wat-1.8</identifier>
<location>
<url>https://aclanthology.org/2021.wat-1.8</url>
</location>
<part>
<date>2021-aug</date>
<extent unit="page">
<start>90</start>
<end>95</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T NICT-2 Translation System at WAT-2021: Applying a Pretrained Multilingual Encoder-Decoder Model to Low-resource Language Pairs
%A Imamura, Kenji
%A Sumita, Eiichiro
%S Proceedings of the 8th Workshop on Asian Translation (WAT2021)
%D 2021
%8 aug
%I Association for Computational Linguistics
%C Online
%F imamura-sumita-2021-nict
%X In this paper, we present the NICT system (NICT-2) submitted to the NICT-SAP shared task at the 8th Workshop on Asian Translation (WAT-2021). A feature of our system is that we used a pretrained multilingual BART (Bidirectional and Auto-Regressive Transformer; mBART) model. Because publicly available models do not support some languages in the NICT-SAP task, we added these languages to the mBART model and then trained it using monolingual corpora extracted from Wikipedia. We fine-tuned the expanded mBART model using the parallel corpora specified by the NICT-SAP task. The BLEU scores greatly improved in comparison with those of systems without the pretrained model, including the additional languages.
%R 10.18653/v1/2021.wat-1.8
%U https://aclanthology.org/2021.wat-1.8
%U https://doi.org/10.18653/v1/2021.wat-1.8
%P 90-95
Markdown (Informal)
[NICT-2 Translation System at WAT-2021: Applying a Pretrained Multilingual Encoder-Decoder Model to Low-resource Language Pairs](https://aclanthology.org/2021.wat-1.8) (Imamura & Sumita, WAT 2021)
ACL