@inproceedings{kuulmets-etal-2025-well,
title = "How Well do {LLM}s know {Finno-Ugric} Languages? {A} Systematic Assessment",
author = "Kuulmets, Hele-Andra and
Purason, Taido and
Fishel, Mark",
editor = "Johansson, Richard and
Stymne, Sara",
booktitle = "Proceedings of the Joint 25th Nordic Conference on Computational Linguistics and 11th Baltic Conference on Human Language Technologies (NoDaLiDa/Baltic-HLT 2025)",
month = mar,
year = "2025",
address = "Tallinn, Estonia",
publisher = "University of Tartu Library",
url = "https://preview.aclanthology.org/fix-sig-urls/2025.nodalida-1.37/",
pages = "340--353",
ISBN = "978-9908-53-109-0",
abstract = "We present a systematic evaluation of multilingual capabilities of open large language models (LLMs), specifically focusing on five Finno-Ugric (FiU) languages. Our investigation covers multiple prompting strategies across several benchmarks and reveals that Llama-2 7B and Llama-2 13B perform weakly on most FiU languages. In contrast, Llama 3.1 models show impressive improvements, even for extremely low-resource languages such as V{\~o}ro and Komi, indicating successful cross-lingual knowledge transfer inside the models. Finally, we show that stronger base models outperform weaker, language-adapted models, thus emphasizing the importance of base model in successful language adaptation."
}
Markdown (Informal)
[How Well do LLMs know Finno-Ugric Languages? A Systematic Assessment](https://preview.aclanthology.org/fix-sig-urls/2025.nodalida-1.37/) (Kuulmets et al., NoDaLiDa 2025)
ACL
- Hele-Andra Kuulmets, Taido Purason, and Mark Fishel. 2025. How Well do LLMs know Finno-Ugric Languages? A Systematic Assessment. In Proceedings of the Joint 25th Nordic Conference on Computational Linguistics and 11th Baltic Conference on Human Language Technologies (NoDaLiDa/Baltic-HLT 2025), pages 340–353, Tallinn, Estonia. University of Tartu Library.