@inproceedings{sehar-etal-2025-benchmarking,
title = "Benchmarking Whisper for Low-Resource Speech Recognition: An N-Shot Evaluation on {P}ashto, {P}unjabi, and {U}rdu",
author = "Sehar, Najm Ul and
Khalid, Ayesha and
Adeeba, Farah and
Hussain, Sarmad",
editor = "Sarveswaran, Kengatharaiyer and
Vaidya, Ashwini and
Krishna Bal, Bal and
Shams, Sana and
Thapa, Surendrabikram",
booktitle = "Proceedings of the First Workshop on Challenges in Processing South Asian Languages (CHiPSAL 2025)",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "International Committee on Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2025.chipsal-1.20/",
pages = "202--207",
abstract = "Whisper, a large-scale multilingual model, has demonstrated strong performance in speech recognition benchmarks, but its effectiveness on low-resource languages remains under-explored. This paper evaluates Whisper{'}s performance on Pashto, Punjabi, and Urdu, three underrepresented languages. While Automatic Speech Recognition (ASR) has advanced for widely spoken languages, low-resource languages still face challenges due to limited data. Whisper{'}s zero-shot performance was benchmarked and then its small variant was fine-tuned to improve transcription accuracy. Significant reductions in Word Error Rate (WER) were achieved through few-shot fine-tuning, which helped the model better handle challenges such as complex phonetic structures, compared to zero-shot performance. This study contributes to improving multilingual ASR for low-resource languages and highlights Whisper{'}s adaptability and potential for further enhancement."
}
Markdown (Informal)
[Benchmarking Whisper for Low-Resource Speech Recognition: An N-Shot Evaluation on Pashto, Punjabi, and Urdu](https://preview.aclanthology.org/fix-sig-urls/2025.chipsal-1.20/) (Sehar et al., CHiPSAL 2025)
ACL