@inproceedings{liu-etal-2022-enhancing,
title = "Enhancing Documentation of {H}upa with Automatic Speech Recognition",
author = "Liu, Zoey and
Spence, Justin and
Prud{'}hommeaux, Emily",
editor = "Moeller, Sarah and
Anastasopoulos, Antonios and
Arppe, Antti and
Chaudhary, Aditi and
Harrigan, Atticus and
Holden, Josh and
Lachler, Jordan and
Palmer, Alexis and
Rijhwani, Shruti and
Schwartz, Lane",
booktitle = "Proceedings of the Fifth Workshop on the Use of Computational Methods in the Study of Endangered Languages",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2022.computel-1.23/",
doi = "10.18653/v1/2022.computel-1.23",
pages = "187--192",
abstract = "This study investigates applications of automatic speech recognition (ASR) techniques to Hupa, a critically endangered Native American language from the Dene (Athabaskan) language family. Using around 9h12m of spoken data produced by one elder who is a first-language Hupa speaker, we experimented with different evaluation schemes and training settings. On average a fully connected deep neural network reached a word error rate of 35.26{\%}. Our overall results illustrate the utility of ASR for making Hupa language documentation more accessible and usable. In addition, we found that when training acoustic models, using recordings with transcripts that were not carefully verified did not necessarily have a negative effect on model performance. This shows promise for speech corpora of indigenous languages that commonly include transcriptions produced by second-language speakers or linguists who have advanced knowledge in the language of interest."
}
Markdown (Informal)
[Enhancing Documentation of Hupa with Automatic Speech Recognition](https://preview.aclanthology.org/jlcl-multiple-ingestion/2022.computel-1.23/) (Liu et al., ComputEL 2022)
ACL