@inproceedings{yang-etal-2022-self,
title = "Self-supervised Rewiring of Pre-trained Speech Encoders:Towards Faster Fine-tuning with Less Labels in Speech Processing",
author = "Yang, Hao and
Zhao, Jinming and
Haffari, Gholamreza and
Shareghi, Ehsan",
editor = "Goldberg, Yoav and
Kozareva, Zornitsa and
Zhang, Yue",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2022",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2022.findings-emnlp.141/",
doi = "10.18653/v1/2022.findings-emnlp.141",
pages = "1952--1959",
abstract = "Pre-trained speech Transformers have facilitated great success across various speech processing tasks. However, fine-tuning these encoders for downstream tasks require sufficiently large training data to converge or to achieve state-of-the-art. In text domain this has been partly attributed to sub-optimality of the representation space in pre-trained Transformers. In this work, we take a sober look into pre-trained speech encoders and rewire their representation space without requiring any task-specific labels. Our method utilises neutrally synthesised version of audio inputs along with frame masking to construct positive pairs for contrastive self-supervised learning. When used for augmenting the wav2vec 2 encoder, we observe consistent improvement of isotropy in the representation space. Our experiments on 6 speech processing tasks, exhibit a significant convergence speedup during task fine-tuning as well as consistent task improvement, specially in low-resource settings."
}
Markdown (Informal)
[Self-supervised Rewiring of Pre-trained Speech Encoders:Towards Faster Fine-tuning with Less Labels in Speech Processing](https://preview.aclanthology.org/add-emnlp-2024-awards/2022.findings-emnlp.141/) (Yang et al., Findings 2022)
ACL