@inproceedings{tran-etal-2023-improving,
title = "Improving Embedding Transfer for Low-Resource Machine Translation",
author = "Tran, Van-Hien and
Ding, Chenchen and
Tanaka, Hideki and
Utiyama, Masao",
editor = "Utiyama, Masao and
Wang, Rui",
booktitle = "Proceedings of Machine Translation Summit XIX, Vol. 1: Research Track",
month = sep,
year = "2023",
address = "Macau SAR, China",
publisher = "Asia-Pacific Association for Machine Translation",
url = "https://preview.aclanthology.org/fix-sig-urls/2023.mtsummit-research.11/",
pages = "123--134",
abstract = "Low-resource machine translation (LRMT) poses a substantial challenge due to the scarcity of parallel training data. This paper introduces a new method to improve the transfer of the embedding layer from the Parent model to the Child model in LRMT, utilizing trained token embeddings in the Parent model{'}s high-resource vocabulary. Our approach involves projecting all tokens into a shared semantic space and measuring the semantic similarity between tokens in the low-resource and high-resource languages. These measures are then utilized to initialize token representations in the Child model{'}s low-resource vocabulary. We evaluated our approach on three benchmark datasets of low-resource language pairs: Myanmar-English, Indonesian-English, and Turkish-English. The experimental results demonstrate that our method outperforms previous methods regarding translation quality. Additionally, our approach is computationally efficient, leading to reduced training time compared to prior works."
}
Markdown (Informal)
[Improving Embedding Transfer for Low-Resource Machine Translation](https://preview.aclanthology.org/fix-sig-urls/2023.mtsummit-research.11/) (Tran et al., MTSummit 2023)
ACL