@inproceedings{nachesa-niculae-2025-knn,
title = "k{NN} For Whisper And Its Effect On Bias And Speaker Adaptation",
author = "Nachesa, Maya K. and
Niculae, Vlad",
editor = "Chiruzzo, Luis and
Ritter, Alan and
Wang, Lu",
booktitle = "Findings of the Association for Computational Linguistics: NAACL 2025",
month = apr,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/landing_page/2025.findings-naacl.369/",
pages = "6621--6627",
ISBN = "979-8-89176-195-7",
abstract = "Speech recognition performance varies by language, domain, and speaker characteristics such as accent, but fine-tuning a model on any of these categories may lead to catastrophic forgetting. Token-level $k$ nearest neighbor search ($k$NN), first proposed for neural sequence decoders for natural language generation (NLG) and machine translation (MT), is a non-parametric method that instead adapts using inference-time search in an external datastore, without training the underlying model. We show that Whisper, a transformer end-to-end speech model, benefits from $k$NN. We investigate the differences between the speech and text setups. We discuss implications for speaker adaptation, and analyze improvements by gender, accent, and age."
}
Markdown (Informal)
[kNN For Whisper And Its Effect On Bias And Speaker Adaptation](https://preview.aclanthology.org/landing_page/2025.findings-naacl.369/) (Nachesa & Niculae, Findings 2025)
ACL