@inproceedings{cheng-wu-2025-speech,
title = "Speech Recognition for Low-resource Languages: A Comparative Study on {H}akka {H}an Characters and {R}omanization",
author = "Cheng, Yu-Hsiang and
Wu, Yi-Syuan",
editor = "Chang, Kai-Wei and
Lu, Ke-Han and
Yang, Chih-Kai and
Tam, Zhi-Rui and
Chang, Wen-Yu and
Wang, Chung-Che",
booktitle = "Proceedings of the 37th Conference on Computational Linguistics and Speech Processing (ROCLING 2025)",
month = nov,
year = "2025",
address = "National Taiwan University, Taipei City, Taiwan",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-ijcnlp-aacl/2025.rocling-main.49/",
pages = "435--440",
ISBN = "979-8-89176-379-1",
abstract = "This study focuses on speech recognition for low-resource languages, with Hakka as the case study. Since there is currently a lack of dedicated speech models for Taiwanese Southern Min, Hakka, and indigenous languages, we adopt OpenAI Whisper-Medium as the base model and apply Low-Rank Adaptation (LoRA) for fine-tuning. Two models with different output forms were developed: a Hakka character-based model and a Hakka phonetic-based model. The experimental dataset contains approximately 80 hours of speech, covering the Dapu and Zhao{'}an dialects, and the models were evaluated using Character Error Rate (CER) and Word Error Rate (WER)."
}Markdown (Informal)
[Speech Recognition for Low-resource Languages: A Comparative Study on Hakka Han Characters and Romanization](https://preview.aclanthology.org/ingest-ijcnlp-aacl/2025.rocling-main.49/) (Cheng & Wu, ROCLING 2025)
ACL