@inproceedings{purason-etal-2025-llms,
title = "{LLM}s for Extremely Low-Resource {F}inno-{U}gric Languages",
author = "Purason, Taido and
Kuulmets, Hele-Andra and
Fishel, Mark",
editor = "Chiruzzo, Luis and
Ritter, Alan and
Wang, Lu",
booktitle = "Findings of the Association for Computational Linguistics: NAACL 2025",
month = apr,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2025.findings-naacl.373/",
pages = "6677--6697",
ISBN = "979-8-89176-195-7",
abstract = "The advancement of large language models (LLMs) has predominantly focused on high-resource languages, leaving low-resource languages, such as those in the Finno-Ugric family, significantly underrepresented. This paper addresses this gap by focusing on V{\~o}ro, Livonian, and Komi. We cover almost the entire cycle of LLM creation, from data collection to instruction tuning and evaluation. Our contributions include developing multilingual base and instruction-tuned models; creating evaluation benchmarks, including the smugri-MT-bench multi-turn conversational benchmark; and conducting human evaluation. We intend for this work to promote linguistic diversity, ensuring that lesser-resourced languages can benefit from advancements in NLP."
}
Markdown (Informal)
[LLMs for Extremely Low-Resource Finno-Ugric Languages](https://preview.aclanthology.org/fix-sig-urls/2025.findings-naacl.373/) (Purason et al., Findings 2025)
ACL
- Taido Purason, Hele-Andra Kuulmets, and Mark Fishel. 2025. LLMs for Extremely Low-Resource Finno-Ugric Languages. In Findings of the Association for Computational Linguistics: NAACL 2025, pages 6677–6697, Albuquerque, New Mexico. Association for Computational Linguistics.