@inproceedings{kriukova-etal-2026-choosing,
title = {Choosing an {ASR} model for D{\"e}n{\"e} S{\k{u}}{\l}{\i}n{\'e}: Navigating polysynthesis and unstandardized orthography},
author = "Kriukova, Olga and
Arppe, Antti and
Lovick, Olga",
editor = "Agyapong, Godfred and
Moeller, Sarah and
Arppe, Antti and
Marashian, Ali and
Rosenblum, Daisy",
booktitle = "Proceedings of the Ninth Workshop on the Use of Computational Methods in the Study of Endangered Languages ({C}omput{EL}-9)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.computel-1.3/",
pages = "15--25",
ISBN = "979-8-89176-422-4",
abstract = {While several pre-trained multilingual models are actively used for fine-tuning on under-resourced and endangered languages, it remains unclear which architectures perform better and what factors explain their varying performance across languages. Although this question may be less pressing for languages with adequate resources, it is critical for endangered language communities, where limited time and funding to experiment with multiple model options are available (Jimerson et al., 2023). We compare the performance of two ASR architectures, Wav2Vec2 and Whisper, on a D{\"e}n{\"e} S{\k{u}}{\l}{\i}n{\'e} dataset. This language and dataset present several challenges common to under-resourced and endangered languages: unstandardized orthography, pronunciation variation, and phonological and morphosyntactic structures that differ from the major languages represented in the multilingual datasets used for pre-training large ASR models. Although Wav2Vec2 reportedly outperforms Whisper in low-resource settings (see e.g., Coto-Solano et al., 2024; Nahabwe et al., 2025; Williams et al., 2023), our study shows that Whisper yields significantly better results on the D{\"e}n{\"e} S{\k{u}}{\l}{\i}n{\'e} dataset. These findings suggest that model performance may depend not only on architecture, dataset size, or typological features of language, but also on dataset-specific characteristics. In our case, Whisper showed better adaptability to a dataset with inconsistent spelling and pronunciation. Further verification across similarly inconsistent datasets is required to assess the generalizability of this result.}
}Markdown (Informal)
[Choosing an ASR model for Dënë Sųłıné: Navigating polysynthesis and unstandardized orthography](https://preview.aclanthology.org/ingest-acl-workshops/2026.computel-1.3/) (Kriukova et al., ComputEL 2026)
ACL