@inproceedings{lim-etal-2024-modularized,
title = "Modularized Multilingual {NMT} with Fine-grained Interlingua",
author = "Lim, Sungjun and
Choi, Yoonjung and
Kim, Sangha",
editor = "Duh, Kevin and
Gomez, Helena and
Bethard, Steven",
booktitle = "Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2024.naacl-long.328/",
doi = "10.18653/v1/2024.naacl-long.328",
pages = "5884--5899",
abstract = "Recently, one popular alternative in Multilingual NMT (MNMT) is modularized MNMT that has both language-specific encoders and decoders. However, due to the absence of layer-sharing, the modularized MNMT failed to produce satisfactory language-independent (Interlingua) features, leading to performance degradation in zero-shot translation. To address this issue, a solution was proposed to share the top of language-specific encoder layers, enabling the successful generation of interlingua features. Nonetheless, it should be noted that this sharing structure does not guarantee the explicit propagation of language-specific features to their respective language-specific decoders. Consequently, to overcome this challenge, we present our modularized MNMT approach, where a modularized encoder is divided into three distinct encoder modules based on different sharing criteria: (1) source language-specific ($Enc_{s}$); (2) universal ($Enc_{all}$); (3) target language-specific ($Enc_{t}$). By employing these sharing strategies, $Enc_{all}$ propagates the interlingua features, after which $Enc_{t}$ propagates the target language-specific features to the language-specific decoders. Additionally, we suggest the Denoising Bi-path Autoencoder (DBAE) to fortify the Denoising Autoencoder (DAE) by leveraging $Enc_{t}$. For experimental purposes, our training corpus comprises both En-to-Any and Any-to-En directions. We adjust the size of our corpus to simulate both balanced and unbalanced settings. Our method demonstrates an improved average BLEU score by ``+2.90'' in En-to-Any directions and by ``+3.06'' in zero-shot compared to other MNMT baselines."
}
Markdown (Informal)
[Modularized Multilingual NMT with Fine-grained Interlingua](https://preview.aclanthology.org/fix-sig-urls/2024.naacl-long.328/) (Lim et al., NAACL 2024)
ACL
- Sungjun Lim, Yoonjung Choi, and Sangha Kim. 2024. Modularized Multilingual NMT with Fine-grained Interlingua. In Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers), pages 5884–5899, Mexico City, Mexico. Association for Computational Linguistics.