@inproceedings{ji-chen-2025-many,
title = "How Many Languages Make Good Multilingual Instruction Tuning? A Case Study on {BLOOM}",
author = "Ji, Shaoxiong and
Chen, Pinzhen",
editor = "Rambow, Owen and
Wanner, Leo and
Apidianaki, Marianna and
Al-Khalifa, Hend and
Eugenio, Barbara Di and
Schockaert, Steven",
booktitle = "Proceedings of the 31st International Conference on Computational Linguistics",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2025.coling-main.175/",
pages = "2575--2581",
abstract = "Instruction tuning a large language model with multiple languages can prepare it for multilingual downstream tasks. Nonetheless, it is yet to be determined whether having a handful of languages is sufficient, or whether the benefits increase with the inclusion of more. By fine-tuning large multilingual models on 1 to 52 languages, we present a case study on BLOOM to understand three pertinent factors affecting performance: the number of languages, language exposure, and similarity between training and test languages. Overall we found that 1) expanding language coverage in multilingual instruction tuning proves to be beneficial; 2) accuracy often significantly boots if the test language appears in the instruction mixture; 3) languages' genetic features correlate with cross-lingual transfer more than merely the number of language but different languages benefit to various degrees."
}
Markdown (Informal)
[How Many Languages Make Good Multilingual Instruction Tuning? A Case Study on BLOOM](https://preview.aclanthology.org/fix-sig-urls/2025.coling-main.175/) (Ji & Chen, COLING 2025)
ACL