@inproceedings{kumar-etal-2025-tackling,
title = "Tackling Low-Resource {NMT} with Instruction-Tuned {LL}a{MA}: A Study on Kokborok and {B}odo",
author = "Kumar, Deepak and
Singh, Kshetrimayum Boynao and
Ekbal, Asif",
editor = "Haddow, Barry and
Kocmi, Tom and
Koehn, Philipp and
Monz, Christof",
booktitle = "Proceedings of the Tenth Conference on Machine Translation",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-emnlp/2025.wmt-1.97/",
pages = "1215--1221",
ISBN = "979-8-89176-341-8",
abstract = "This paper presents a new neural machine translation (NMT) system aimed at low-resource language pairs: English to Kokborok and English to Bodo. The framework leverages the LLaMA3-8B-Instruct model along with LoRA-based parameter-efficient fine-tuning. For translating into Kokborok, the model undergoes an initial continued pre-training phase on a dataset containing 75,000 Kokborok and 25,000 English monolingual sentences, followed by instruction-tuning. This tuning uses a reformulated version of WMT25 dataset, adapted to the Alpaca format to support instructional goals. In the Bodo translation, the model is pre-trained on a more extensive dataset of 350,000 Bodo and 125,000 English sentences, using a similar instruction-tuning approach. LoRA adapters are used to modify the large LLaMA3 model for these low-resource settings. Testing with the WMT25 test dataset reveals modest translation results, highlighting the difficulties in translating for low-resource languages. Translating English to Bodo, the model achieved a BLEU score of 4.38, a TER of 92.5, and a chrF score of 35.4. For English to Kokborok, it yielded scores of 5.59 in chrF, 105.4 in TER, and 0.17 in BLEU. These results underscore the intricacies of the task and highlight the critical need for further data collection, domain-specific adaptations, and improvements in model design to better support underrepresented languages."
}Markdown (Informal)
[Tackling Low-Resource NMT with Instruction-Tuned LLaMA: A Study on Kokborok and Bodo](https://preview.aclanthology.org/ingest-emnlp/2025.wmt-1.97/) (Kumar et al., WMT 2025)
ACL