@inproceedings{waheed-etal-2025-robust,
title = "On the Robust Approximation of {ASR} Metrics",
author = "Waheed, Abdul and
Atwany, Hanin and
Singh, Rita and
Raj, Bhiksha",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/landing_page/2025.findings-acl.1187/",
pages = "23119--23146",
ISBN = "979-8-89176-256-5",
abstract = "Recent advances in speech foundation models are largely driven by scaling both model size and data, enabling them to perform a wide range of tasks, including speech recognition. Traditionally, ASR models are evaluated using metrics like Word Error Rate (WER) and Character Error Rate (CER), which depend on ground truth labels. As a result of limited labeled data from diverse domains and testing conditions, the true generalization capabilities of these models beyond standard benchmarks remain unclear. Moreover, labeling data is both costly and time-consuming. To address this, we propose a novel label-free approach for approximating ASR performance metrics, eliminating the need for ground truth labels. Our method utilizes multimodal embeddings in a unified space for speech and transcription representations, combined with a high-quality proxy model to compute proxy metrics. These features are used to train a regression model to predict key ASR metrics like Word Error Rate (WER) and Character Error Rate (CER). We experiment with over 40 models across 14 datasets representing both standard and in-the-wild testing conditions. Our results show that we approximate the metrics within a single-digit absolute difference across all experimental configurations, outperforming the most recent baseline by more than 50{\%}."
}
Markdown (Informal)
[On the Robust Approximation of ASR Metrics](https://preview.aclanthology.org/landing_page/2025.findings-acl.1187/) (Waheed et al., Findings 2025)
ACL
- Abdul Waheed, Hanin Atwany, Rita Singh, and Bhiksha Raj. 2025. On the Robust Approximation of ASR Metrics. In Findings of the Association for Computational Linguistics: ACL 2025, pages 23119–23146, Vienna, Austria. Association for Computational Linguistics.