@inproceedings{holmstrom-etal-2023-bridging,
title = "Bridging the Resource Gap: Exploring the Efficacy of {E}nglish and Multilingual {LLM}s for {S}wedish",
author = {Holmstr{\"o}m, Oskar and
Kunz, Jenny and
Kuhlmann, Marco},
editor = "Ilinykh, Nikolai and
Morger, Felix and
Dann{\'e}lls, Dana and
Dobnik, Simon and
Megyesi, Be{\'a}ta and
Nivre, Joakim",
booktitle = "Proceedings of the Second Workshop on Resources and Representations for Under-Resourced Languages and Domains (RESOURCEFUL-2023)",
month = may,
year = "2023",
address = "T{\'o}rshavn, the Faroe Islands",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.resourceful-1.13/",
pages = "92--110",
abstract = "Large language models (LLMs) have substantially improved natural language processing (NLP) performance, but training these models from scratch is resource-intensive and challenging for smaller languages. With this paper, we want to initiate a discussion on the necessity of language-specific pre-training of LLMs.We propose how the {\textquotedblleft}one model-many models{\textquotedblright} conceptual framework for task transfer can be applied to language transfer and explore this approach by evaluating the performance of non-Swedish monolingual and multilingual models' performance on tasks in Swedish.Our findings demonstrate that LLMs exposed to limited Swedish during training can be highly capable and transfer competencies from English off-the-shelf, including emergent abilities such as mathematical reasoning, while at the same time showing distinct culturally adapted behaviour. Our results suggest that there are resourceful alternatives to language-specific pre-training when creating useful LLMs for small languages."
}
Markdown (Informal)
[Bridging the Resource Gap: Exploring the Efficacy of English and Multilingual LLMs for Swedish](https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.resourceful-1.13/) (Holmström et al., RESOURCEFUL 2023)
ACL