@article{arana-etal-2026-multimodal,
title = "Multimodal Large Language Models for Low-Resource Languages: A Case Study for {B}asque",
author = "Arana, Lukas and
Etxaniz, Julen and
Salaberria, Ander and
Azkune, Gorka",
editor = "Piperidis, Stelios and
Bel, N{\'u}ria and
van den Heuvel, Henk and
Ide, Nancy and
Krek, Simon and
Toral, Antonio",
journal = "International Conference on Language Resources and Evaluation",
volume = "main",
month = may,
year = "2026",
address = "Palma de Mallorca, Spain",
publisher = "ELRA Language Resource Association",
url = "https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.721/",
pages = "9172--9187",
abstract = "Current Multimodal Large Language Models exhibit very strong performance for several demanding tasks. While commercial MLLMs deliver acceptable performance in low-resource languages, comparable results remain unattained within the open science community. In this paper, we aim to develop a strong MLLM for a low-resource language, namely Basque. For that purpose, we develop our own training and evaluation image-text datasets, leveraging state-of-the-art translation systems. Using two different Large Language Models as backbones, the Llama-3.1-Instruct model and a Basque-adapted variant called Latxa, we explore several data mixtures for training, encompassing Basque and English languages for both multimodal and text-only data. Evaluating our MLLMs for close-ended and open-ended generation tasks, we show that: i) low ratios of Basque multimodal data (around 20{\%}) are already enough to obtain solid results on Basque benchmarks, and ii) contrary to expected, a Basque instructed backbone LLM is not required to obtain a strong MLLM in Basque. Additionally, we specify the optimal data mixture strategy, the effects of multimodal data in text-only tasks, and analyze evaluation approaches for open-ended generation tasks. Our results pave the way to develop MLLMs for other low-resource languages by openly releasing our resources."
}Markdown (Informal)
[Multimodal Large Language Models for Low-Resource Languages: A Case Study for Basque](https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.721/) (Arana et al., LREC 2026)
ACL