@inproceedings{samardzic-etal-2026-regional,
title = "Regional Variation in the Performance of {ASR} Models on {C}roatian and {S}erbian",
author = "Samard{\v{z}}i{\'c}, Tanja and
Rupnik, Peter and
Ljube{\v{s}}i{\'c}, Nikola",
booktitle = "Proceedings of the 13th Workshop on {NLP} for Similar Languages, Varieties and Dialects",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/manual-author-scripts/2026.vardial-1.20/",
pages = "242--249",
abstract = "Regional variation was a limiting factor for automatic speech recognition (ASR) before large language models. With the new technology, speech processing becomes more general, which opens the question of how to use data in similar languages such as Croatian and Serbian. In this paper, we analyse model performance in the currently available train-test scenarios with the goal of better understanding the mutual interference of these two languages. Our findings suggest that better performing models are not very sensitive to the regional variation. Training from scratch in one of the languages can give good results on both of them, while fine-tuning large pre-trained multilingual models on smaller data sets does not give the expected results."
}Markdown (Informal)
[Regional Variation in the Performance of ASR Models on Croatian and Serbian](https://preview.aclanthology.org/manual-author-scripts/2026.vardial-1.20/) (Samardžić et al., VarDial 2026)
ACL