@inproceedings{yahan-islam-2025-leveraging,
title = "Leveraging Large Language Models for {S}panish-Indigenous Language Machine Translation at {A}mericas{NLP} 2025",
author = "Yahan, Mahshar and
Islam, Dr. Mohammad",
editor = "Mager, Manuel and
Ebrahimi, Abteen and
Pugh, Robert and
Rijhwani, Shruti and
Von Der Wense, Katharina and
Chiruzzo, Luis and
Coto-Solano, Rolando and
Oncevay, Arturo",
booktitle = "Proceedings of the Fifth Workshop on NLP for Indigenous Languages of the Americas (AmericasNLP)",
month = may,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/Author-page-Marten-During-lu/2025.americasnlp-1.15/",
pages = "126--133",
ISBN = "979-8-89176-236-7",
abstract = "This paper presents our approach to machine translation between Spanish and 13 Indigenous languages of the Americas as part of the AmericasNLP 2025 shared task. Addressing the challenges of low-resource translation, we fine-tuned advanced multilingual models, including NLLB-200 (Distilled-600M), Llama 3.1 (8B-Instruct) and XGLM 1.7B, using techniques such as dynamic batching, token adjustments, and embedding initialization. Data preprocessing steps like punctuation removal and tokenization refinements were employed to achieve data generalization. While our models demonstrated strong performance for Awajun and Quechua translations, they struggled with morphologically complex languages like Nahuatl and Otom{\'i}. Our approach achieved competitive ChrF++ scores for Awajun (35.16) and Quechua (31.01) in the Spanish-to-Indigenous translation track (Es{\textrightarrow}Xx). Similarly, in the Indigenous-to-Spanish track (Xx{\textrightarrow}Es), we obtained ChrF++ scores of 33.70 for Awajun and 31.71 for Quechua. These results underscore the potential of tailored methodologies in preserving linguistic diversity while advancing machine translation for endangered languages."
}
Markdown (Informal)
[Leveraging Large Language Models for Spanish-Indigenous Language Machine Translation at AmericasNLP 2025](https://preview.aclanthology.org/Author-page-Marten-During-lu/2025.americasnlp-1.15/) (Yahan & Islam, AmericasNLP 2025)
ACL