@inproceedings{liang-levow-2025-tone,
title = "Tone in Perspective: A Computational Typological Analysis of Tone Function in {ASR}",
author = "Liang, Siyu and
Levow, Gina-Anne",
editor = "Hahn, Michael and
Rani, Priya and
Kumar, Ritesh and
Shcherbakov, Andreas and
Sorokin, Alexey and
Serikov, Oleg and
Cotterell, Ryan and
Vylomova, Ekaterina",
booktitle = "Proceedings of the 7th Workshop on Research in Computational Linguistic Typology and Multilingual NLP",
month = aug,
year = "2025",
address = "Vinenna. Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/landing_page/2025.sigtyp-1.11/",
pages = "82--92",
ISBN = "979-8-89176-281-7",
abstract = "This study investigates the impact of pitch flattening on automatic speech recognition (ASR) performance across tonal and non-tonal languages. Using vocoder-based signal processing techniques, we created pitch-flattened versions of speech recordings and compared ASR performance against original recordings. Results reveal that tonal languages experience substantially larger performance degradation than non-tonal languages. Analysis of tone confusion matrices shows systematic patterns of misidentification where contour tones collapse toward level tones when pitch information is removed. Calculation of tone{'}s functional load at syllable and word levels demonstrates that syllable-level functional load strongly predicts ASR vulnerability to pitch flattening, while word-level patterns reflect each language{'}s morphological structure. These findings illuminate the differential importance of pitch information across languages and suggest that ASR systems for languages with high syllable-level functional load require more robust pitch modeling."
}
Markdown (Informal)
[Tone in Perspective: A Computational Typological Analysis of Tone Function in ASR](https://preview.aclanthology.org/landing_page/2025.sigtyp-1.11/) (Liang & Levow, SIGTYP 2025)
ACL