@inproceedings{setiawan-etal-2026-context,
title = "Context Volume Drives Performance: Tackling Domain Shift in Extremely Low-Resource Translation via {RAG}",
author = "Setiawan, David Samuel and
Merx, Raphael and
Lau, Jey Han",
editor = "Ojha, Atul Kr. and
Liu, Chao-hong and
Vylomova, Ekaterina and
Pirinen, Flammie and
Washington, Jonathan and
Oco, Nathaniel and
Zhao, Xiaobing",
booktitle = "Proceedings for the Ninth Workshop on Technologies for Machine Translation of Low Resource Languages ({L}o{R}es{MT} 2026)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/manual-author-scripts/2026.loresmt-1.7/",
pages = "87--101",
ISBN = "979-8-89176-366-1",
abstract = "Neural Machine Translation (NMT) models for low-resource languages suffer significant performance degradation under domain shift. We quantify this challenge using **Dhao**, an indigenous language of Eastern Indonesia with no digital footprint beyond the New Testament (NT). When applied to the unseen Old Testament (OT), a standard NMT model fine-tuned on the NT drops from an in-domain score of 36.17 chrF++ to 27.11 chrF++. To recover this loss, we introduce a **hybrid framework** where a fine-tuned NMT model generates an initial draft, which is then refined by a Large Language Model (LLM) using Retrieval-Augmented Generation (RAG). The final system achieves 35.21 chrF++ ($+8.10$ recovery), effectively matching the original in-domain quality. Our analysis reveals that this performance is driven primarily by the **number of retrieved examples** rather than the choice of retrieval algorithm. Qualitative analysis confirms the LLM acts as a robust ``safety net,'' repairing severe failures in zero-shot domains."
}Markdown (Informal)
[Context Volume Drives Performance: Tackling Domain Shift in Extremely Low-Resource Translation via RAG](https://preview.aclanthology.org/manual-author-scripts/2026.loresmt-1.7/) (Setiawan et al., LoResMT 2026)
ACL