@inproceedings{sarapat-etal-2025-language,
title = "Language Confusion and Multilingual Performance: A Case Study of {T}hai-Adapted Large Language Models",
author = "Sarapat, Pakhapoom and
Ukarapol, Trapoom and
Hashimoto, Tatsunori",
editor = {Sinha, Aman and
V{\'a}zquez, Ra{\'u}l and
Mickus, Timothee and
Agarwal, Rohit and
Buhnila, Ioana and
Schmidtov{\'a}, Patr{\'i}cia and
Gamba, Federica and
Prasad, Dilip K. and
Tiedemann, J{\"o}rg},
booktitle = "Proceedings of the 1st Workshop on Confabulation, Hallucinations and Overgeneration in Multilingual and Practical Settings (CHOMPS 2025)",
month = dec,
year = "2025",
address = "Mumbai, India",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-ijcnlp-aacl/2025.chomps-main.5/",
pages = "49--59",
ISBN = "979-8-89176-308-1",
abstract = "This paper presents a comprehensive study on the multilingual adaptability of large language models (LLMs), with a focus on the interplay between training strategies and prompt design. Using Thai as a case study, we examine: (RQ1) the extent to which pre-trained models (Base) can adapt to another language through additional fine-tuning; (RQ2) how continual pre-training (CPT) compares to multilingual pre-training (MLLM) in terms of performance on downstream tasks; and (RQ3) how language variation within different components of a structured prompt{--}task instruction, context input, and output instruction{--}influences task performance in cross-lingual settings. Our findings reveal that CPT proves to be a promising strategy for enhancing model performance in languages other than English like Thai in monolingual settings, particularly for models that initially lack strong linguistic capabilities. Its effectiveness, however, is highly task-dependent and varies based on the base model{'}s initial proficiency. In cross-lingual scenarios, MLLMs exhibit superior robustness compared to Base and CPT models, which are more susceptible to context-output language mismatches. Considering the high cost of training multilingual models from scratch, MLLMs remain a critical component for downstream tasks in multilingual settings due to their strong cross-lingual performance."
}Markdown (Informal)
[Language Confusion and Multilingual Performance: A Case Study of Thai-Adapted Large Language Models](https://preview.aclanthology.org/ingest-ijcnlp-aacl/2025.chomps-main.5/) (Sarapat et al., CHOMPS 2025)
ACL