@inproceedings{lin-etal-2024-continual,
title = "Continual Test-time Adaptation for End-to-end Speech Recognition on Noisy Speech",
author = "Lin, Guan-Ting and
Huang, Wei Ping and
Lee, Hung-yi",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2024.emnlp-main.1116/",
doi = "10.18653/v1/2024.emnlp-main.1116",
pages = "20003--20015",
abstract = "Deep Learning-based end-to-end Automatic Speech Recognition (ASR) has made significant strides but still struggles with performance on out-of-domain samples due to domain shifts in real-world scenarios. Test-Time Adaptation (TTA) methods address this issue by adapting models using test samples at inference time. However, current ASR TTA methods have largely focused on non-continual TTA, which limits cross-sample knowledge learning compared to continual TTA. In this work, we first propose a Fast-slow TTA framework for ASR that leverages the advantage of continual and non-continual TTA. Following this framework, we introduce Dynamic SUTA (DSUTA), an entropy-minimization-based continual TTA method for ASR. To enhance DSUTA`s robustness for time-varying data, we design a dynamic reset strategy to automatically detect domain shifts and reset the model, making it more effective at handling multi-domain data. Our method demonstrates superior performance on various noisy ASR datasets, outperforming both non-continual and continual TTA baselines while maintaining robustness to domain changes without requiring domain boundary information."
}
Markdown (Informal)
[Continual Test-time Adaptation for End-to-end Speech Recognition on Noisy Speech](https://preview.aclanthology.org/add-emnlp-2024-awards/2024.emnlp-main.1116/) (Lin et al., EMNLP 2024)
ACL