@inproceedings{khusainova-etal-2021-hierarchical,
title = "Hierarchical Transformer for Multilingual Machine Translation",
author = "Khusainova, Albina and
Khan, Adil and
Rivera, Ad{\'\i}n Ram{\'\i}rez and
Romanov, Vitaly",
booktitle = "Proceedings of the Eighth Workshop on NLP for Similar Languages, Varieties and Dialects",
month = apr,
year = "2021",
address = "Kiyv, Ukraine",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.vardial-1.2",
pages = "12--20",
abstract = "The choice of parameter sharing strategy in multilingual machine translation models determines how optimally parameter space is used and hence, directly influences ultimate translation quality. Inspired by linguistic trees that show the degree of relatedness between different languages, the new general approach to parameter sharing in multilingual machine translation was suggested recently. The main idea is to use these expert language hierarchies as a basis for multilingual architecture: the closer two languages are, the more parameters they share. In this work, we test this idea using the Transformer architecture and show that despite the success in previous work there are problems inherent to training such hierarchical models. We demonstrate that in case of carefully chosen training strategy the hierarchical architecture can outperform bilingual models and multilingual models with full parameter sharing.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="khusainova-etal-2021-hierarchical">
<titleInfo>
<title>Hierarchical Transformer for Multilingual Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Albina</namePart>
<namePart type="family">Khusainova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Adil</namePart>
<namePart type="family">Khan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Adín</namePart>
<namePart type="given">Ramírez</namePart>
<namePart type="family">Rivera</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vitaly</namePart>
<namePart type="family">Romanov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-apr</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Eighth Workshop on NLP for Similar Languages, Varieties and Dialects</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Kiyv, Ukraine</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The choice of parameter sharing strategy in multilingual machine translation models determines how optimally parameter space is used and hence, directly influences ultimate translation quality. Inspired by linguistic trees that show the degree of relatedness between different languages, the new general approach to parameter sharing in multilingual machine translation was suggested recently. The main idea is to use these expert language hierarchies as a basis for multilingual architecture: the closer two languages are, the more parameters they share. In this work, we test this idea using the Transformer architecture and show that despite the success in previous work there are problems inherent to training such hierarchical models. We demonstrate that in case of carefully chosen training strategy the hierarchical architecture can outperform bilingual models and multilingual models with full parameter sharing.</abstract>
<identifier type="citekey">khusainova-etal-2021-hierarchical</identifier>
<location>
<url>https://aclanthology.org/2021.vardial-1.2</url>
</location>
<part>
<date>2021-apr</date>
<extent unit="page">
<start>12</start>
<end>20</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Hierarchical Transformer for Multilingual Machine Translation
%A Khusainova, Albina
%A Khan, Adil
%A Rivera, Adín Ramírez
%A Romanov, Vitaly
%S Proceedings of the Eighth Workshop on NLP for Similar Languages, Varieties and Dialects
%D 2021
%8 apr
%I Association for Computational Linguistics
%C Kiyv, Ukraine
%F khusainova-etal-2021-hierarchical
%X The choice of parameter sharing strategy in multilingual machine translation models determines how optimally parameter space is used and hence, directly influences ultimate translation quality. Inspired by linguistic trees that show the degree of relatedness between different languages, the new general approach to parameter sharing in multilingual machine translation was suggested recently. The main idea is to use these expert language hierarchies as a basis for multilingual architecture: the closer two languages are, the more parameters they share. In this work, we test this idea using the Transformer architecture and show that despite the success in previous work there are problems inherent to training such hierarchical models. We demonstrate that in case of carefully chosen training strategy the hierarchical architecture can outperform bilingual models and multilingual models with full parameter sharing.
%U https://aclanthology.org/2021.vardial-1.2
%P 12-20
Markdown (Informal)
[Hierarchical Transformer for Multilingual Machine Translation](https://aclanthology.org/2021.vardial-1.2) (Khusainova et al., VarDial 2021)
ACL