@inproceedings{khalid-etal-2025-bridging,
title = "Bridging the Bandwidth Gap: A Mixed Band Telephonic {U}rdu {ASR} Approach with Domain Adaptation for Banking Applications",
author = "Khalid, Ayesha and
Adeeba, Farah and
Sehar, Najm Ul and
Hussain, Sarmad",
editor = "Sarveswaran, Kengatharaiyer and
Vaidya, Ashwini and
Krishna Bal, Bal and
Shams, Sana and
Thapa, Surendrabikram",
booktitle = "Proceedings of the First Workshop on Challenges in Processing South Asian Languages (CHiPSAL 2025)",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "International Committee on Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2025.chipsal-1.17/",
pages = "172--184",
abstract = "The accuracy of Automatic Speech Recognition (ASR) systems is influenced by the quality and context of speech signals, particularly in telephonic environments prone to errors like channel drops and noise, leading to higher Word Error Rates (WER). This paper presents the development of a large vocabulary Urdu ASR system for telephonic speech, based on a corpus of 445 speakers from diverse domains. The corpus, annotated at the sentence level, is used to train and evaluate GMM-HMM and chain Time-Delay Neural Network (TDNN) models on a 10-hour test set. Results show that the TDNN model outperforms GMM-HMM. Mixing narrowband and wideband speech further reduces WER. The test sets are also evaluated for the pre-trained model Whisper for performance comparison. Additionally, system adaptation for the banking domain with a specialized lexicon and language model demonstrates the system{'}s potential for domain-specific applications."
}
Markdown (Informal)
[Bridging the Bandwidth Gap: A Mixed Band Telephonic Urdu ASR Approach with Domain Adaptation for Banking Applications](https://preview.aclanthology.org/fix-sig-urls/2025.chipsal-1.17/) (Khalid et al., CHiPSAL 2025)
ACL