@inproceedings{mullov-waibel-2025-shot,
title = "Few-Shot Learning Translation from New Languages",
author = "Mullov, Carlos and
Waibel, Alexander",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/name-variant-enfa-fane/2025.emnlp-main.163/",
doi = "10.18653/v1/2025.emnlp-main.163",
pages = "3309--3330",
ISBN = "979-8-89176-332-6",
abstract = "Recent work shows strong transfer learning capability to unseen languages in sequence-to-sequence neural networks, under the assumption that we have high-quality word representations for the target language. We evaluate whether this direction is a viable path forward for translation from low-resource languages by investigating how much data is required to learn such high-quality word representations. We first show that learning word embeddings separately from a translation model can enable rapid adaptation to new languages with only a few hundred sentences of parallel data. To see whether the current bottleneck in transfer to low-resource languages lies mainly with learning the word representations, we then train word embeddings models on varying amounts of data, to then plug them into a machine translation model. We show that in this simulated low-resource setting with only 500 parallel sentences and 31,250 sentences of monolingual data we can exceed 15 BLEU on Flores on unseen languages. Finally, we investigate why on a real low-resource language the results are less favorable and find fault with the publicly available multilingual language modelling datasets."
}Markdown (Informal)
[Few-Shot Learning Translation from New Languages](https://preview.aclanthology.org/name-variant-enfa-fane/2025.emnlp-main.163/) (Mullov & Waibel, EMNLP 2025)
ACL
- Carlos Mullov and Alexander Waibel. 2025. Few-Shot Learning Translation from New Languages. In Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing, pages 3309–3330, Suzhou, China. Association for Computational Linguistics.