@inproceedings{sohn-etal-2025-harnessing,
title = "Harnessing Whisper for Prosodic Stress Analysis",
author = "Sohn, Samuel S. and
Knutsen, Sten and
Stromswold, Karin",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/transition-to-people-yaml/2025.findings-acl.1331/",
doi = "10.18653/v1/2025.findings-acl.1331",
pages = "25931--25942",
ISBN = "979-8-89176-256-5",
abstract = "Prosody affects how people produce and understand language, yet studies of how it does so have been hindered by the lack of efficient tools for analyzing prosodic stress. We fine-tune OpenAI Whisper large-v2, a state-of-the-art speech recognition model, to recognize phrasal, lexical, and contrastive stress using a small, carefully annotated dataset. Our results show that Whisper can learn distinct, gender-specific stress patterns to achieve near-human and super-human accuracy in stress classification and transfer its learning from one type of stress to another, surpassing traditional machine learning models. Furthermore, we explore how acoustic context influences its performance and propose a novel black-box evaluation method for characterizing the decision boundaries used by Whisper for prosodic stress interpretation. These findings open new avenues for large-scale, automated prosody research. Models can be found at github.com/SSSohn/ProsodyBench."
}
Markdown (Informal)
[Harnessing Whisper for Prosodic Stress Analysis](https://preview.aclanthology.org/transition-to-people-yaml/2025.findings-acl.1331/) (Sohn et al., Findings 2025)
ACL
- Samuel S. Sohn, Sten Knutsen, and Karin Stromswold. 2025. Harnessing Whisper for Prosodic Stress Analysis. In Findings of the Association for Computational Linguistics: ACL 2025, pages 25931–25942, Vienna, Austria. Association for Computational Linguistics.