@inproceedings{kumar-etal-2026-srcmix,
title = "{S}rc{M}ix: Mixing of Related Source Languages Benefits Extremely Low-resource Machine Translation",
author = "Kumar, Sanjeev and
Jyothi, Preethi and
Bhattacharyya, Pushpak",
editor = "Demberg, Vera and
Inui, Kentaro and
Marquez, Llu{\'i}s",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {EACL} 2026",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-eacl/2026.findings-eacl.332/",
pages = "6306--6323",
ISBN = "979-8-89176-386-9",
abstract = "Multilingual models are widely used for machine translation (MT). However, their effectiveness for extremely low-resource languages (ELRLs) depends critically on how related languages are incorporated during fine-tuning. In this work, we study the role of language mixing directionality, linguistic relatedness, and script compatibility in ELRL translation. We propose SrcMix, a simple source-side mixing strategy that combines related ELRLs during fine-tuning while constraining the decoder to a single target language. Compared to its target-side counterpart TgtMix, SrcMix improves performance by +3 ChrF++ and +5 BLEU in high-resource to ELRL translations, and by +5 ChrF++ and +12 BLEU in mid-resource to ELRL translations. We also release the first Angika MT dataset and provide a systematic comparison of LLM (Aya-101) and NMT (mT5-Large) models under ELRL settings, highlighting the importance of directional mixing and linguistic compatibility."
}Markdown (Informal)
[SrcMix: Mixing of Related Source Languages Benefits Extremely Low-resource Machine Translation](https://preview.aclanthology.org/ingest-eacl/2026.findings-eacl.332/) (Kumar et al., Findings 2026)
ACL