@inproceedings{elmahdy-etal-2014-development,
title = "Development of a {TV} Broadcasts Speech Recognition System for Qatari {A}rabic",
author = "Elmahdy, Mohamed and
Hasegawa-Johnson, Mark and
Mustafawi, Eiman",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Loftsson, Hrafn and
Maegaard, Bente and
Mariani, Joseph and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Ninth International Conference on Language Resources and Evaluation ({LREC}`14)",
month = may,
year = "2014",
address = "Reykjavik, Iceland",
publisher = "European Language Resources Association (ELRA)",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/L14-1369/",
pages = "3057--3061",
abstract = "A major problem with dialectal Arabic speech recognition is due to the sparsity of speech resources. In this paper, a transfer learning framework is proposed to jointly use a large amount of Modern Standard Arabic (MSA) data and little amount of dialectal Arabic data to improve acoustic and language modeling. The Qatari Arabic (QA) dialect has been chosen as a typical example for an under-resourced Arabic dialect. A wide-band speech corpus has been collected and transcribed from several Qatari TV series and talk-show programs. A large vocabulary speech recognition baseline system was built using the QA corpus. The proposed MSA-based transfer learning technique was performed by applying orthographic normalization, phone mapping, data pooling, acoustic model adaptation, and system combination. The proposed approach can achieve more than 28{\%} relative reduction in WER."
}
Markdown (Informal)
[Development of a TV Broadcasts Speech Recognition System for Qatari Arabic](https://preview.aclanthology.org/jlcl-multiple-ingestion/L14-1369/) (Elmahdy et al., LREC 2014)
ACL