@inproceedings{shurtz-etal-2025-scripts,
title = "When Scripts Diverge: Strengthening Low-Resource Neural Machine Translation Through Phonetic Cross-Lingual Transfer",
author = "Shurtz, Ammon and
Richardson, Christian and
Richardson, Stephen D.",
editor = "Adelani, David Ifeoluwa and
Arnett, Catherine and
Ataman, Duygu and
Chang, Tyler A. and
Gonen, Hila and
Raja, Rahul and
Schmidt, Fabian and
Stap, David and
Wang, Jiayi",
booktitle = "Proceedings of the 5th Workshop on Multilingual Representation Learning (MRL 2025)",
month = nov,
year = "2025",
address = "Suzhuo, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-emnlp/2025.mrl-main.22/",
pages = "336--346",
ISBN = "979-8-89176-345-6",
abstract = "Multilingual Neural Machine Translation (MNMT) models enhance translation quality for low-resource languages by exploiting cross-lingual similarities during training{---}a process known as knowledge transfer. This transfer is particularly effective between languages that share lexical or structural features, often enabled by a common orthography. However, languages with strong phonetic and lexical similarities but distinct writing systems experience limited benefits, as the absence of a shared orthography hinders knowledge transfer. To address this limitation, we propose an approach based on phonetic information that enhances token-level alignment across scripts by leveraging transliterations. We systematically evaluate several phonetic transcription techniques and strategies for incorporating phonetic information into NMT models. Our results show that using a shared encoder to process orthographic and phonetic inputs separately consistently yields the best performance for Khmer, Thai, and Lao in both directions with English, and that our custom Cognate-Aware Transliteration (CAT) method consistently improves translation quality over the baseline."
}Markdown (Informal)
[When Scripts Diverge: Strengthening Low-Resource Neural Machine Translation Through Phonetic Cross-Lingual Transfer](https://preview.aclanthology.org/ingest-emnlp/2025.mrl-main.22/) (Shurtz et al., MRL 2025)
ACL