@inproceedings{li-niehues-2025-enhance,
title = "Enhance Contextual Learning in {ASR} for Endangered Low-resource Languages",
author = "Li, Zhaolin and
Niehues, Jan",
editor = "Nguyen, Duc",
booktitle = "Proceedings of the 1st Workshop on Language Models for Underserved Communities (LM4UC 2025)",
month = may,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/Ingest-2025-COMPUTEL/2025.lm4uc-1.1/",
pages = "1--7",
ISBN = "979-8-89176-242-8",
abstract = "Automatic Speech Recognition (ASR) facilitates documenting endangered low-resource languages. While recent advances in acoustic modelling have been substantial, contextual learning remains underexplored. This study investigates the main factors that influence the integration of knowledge from language models (LMs) into state-of-the-art ASR models for endangered low-resource languages. Through experiments on five diverse low-resource languages, we find: 1) Fine-grained tokenization effectively improves ASR performance by addressing the prevalent unknown words and improving data usage efficiency; 2) The integration of transformer-based LMs into ASR systems surpasses that of N-gram LMs only in one language, even though they consistently achieve better results in language modelling tasks. 3) ASR performance is highly sensitive to language-specific optimization, as shown by a 43{\%} performance degradation in one language due to parameter transfer across languages. We open-source our scripts to support further research and applications."
}
Markdown (Informal)
[Enhance Contextual Learning in ASR for Endangered Low-resource Languages](https://preview.aclanthology.org/Ingest-2025-COMPUTEL/2025.lm4uc-1.1/) (Li & Niehues, LM4UC 2025)
ACL