@inproceedings{fort-chelliah-2026-customizing,
title = "Customizing {ASR} for Language Documentation and Resource Prioritization",
author = "Fort, Alexandra and
Chelliah, Shobhana Lakshmi",
editor = "Mysore, Sheshera and
Kumar, Sachin and
Balachandran, Vidhisha and
Hayati, Shirley Anugrah and
Brahman, Faeze and
Moussa, Hanane Nour and
Salemi, Alireza",
booktitle = "Proceedings of the Second Workshop on Customizable {NLP}: Progress and Challenges in Customizing {NLP} for a Domain, Application, Group, or Individual ({C}ustom{NLP}4{U})",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.customnlp4u-1.13/",
pages = "149--159",
ISBN = "979-8-89176-396-8",
abstract = "Research in language documentation has the potential to benefit from integration of ASR models, especially through the assisted transcription of recordings with audio. Recent advancements in ASR for low-resource languages demonstrate the ability to adapt general, multilingual models for unseen languages with limited fine-tuning data, supporting the creation of custom ASR models. However, resources are still required to collect and prepare the fine-tuning data, necessitating exploration of optimization of resource allocation within the process of data collection and preparation. This paper outlines important considerations for the collection and preparation of data for customizing an ASR model for use in language documentation projects. With the development of a Lamkang ASR model as an example, prioritization of tasks within a language documentation project is outlined by analyzing the relative impact of time spent on transcription correction versus time spent on manual alignment on ASR model performance. Results from this research suggest prioritization of transcription correction over manual-alignment of data and suggest fine-tuning multilingual ASR systems produces superior results to zero-shot ASR models, despite recent advancements in the technology."
}Markdown (Informal)
[Customizing ASR for Language Documentation and Resource Prioritization](https://preview.aclanthology.org/ingest-acl-workshops/2026.customnlp4u-1.13/) (Fort & Chelliah, CustomNLP4U 2026)
ACL
- Alexandra Fort and Shobhana Lakshmi Chelliah. 2026. Customizing ASR for Language Documentation and Resource Prioritization. In Proceedings of the Second Workshop on Customizable NLP: Progress and Challenges in Customizing NLP for a Domain, Application, Group, or Individual (CustomNLP4U), pages 149–159, San Diego, California, USA. Association for Computational Linguistics.