@inproceedings{bahar-etal-2019-using,
title = "On Using {S}pec{A}ugment for End-to-End Speech Translation",
author = {Bahar, Parnia and
Zeyer, Albert and
Schl{\"u}ter, Ralf and
Ney, Hermann},
editor = {Niehues, Jan and
Cattoni, Rolando and
St{\"u}ker, Sebastian and
Negri, Matteo and
Turchi, Marco and
Ha, Thanh-Le and
Salesky, Elizabeth and
Sanabria, Ramon and
Barrault, Loic and
Specia, Lucia and
Federico, Marcello},
booktitle = "Proceedings of the 16th International Conference on Spoken Language Translation",
month = nov # " 2-3",
year = "2019",
address = "Hong Kong",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2019.iwslt-1.22/",
abstract = "This work investigates a simple data augmentation technique, SpecAugment, for end-to-end speech translation. SpecAugment is a low-cost implementation method applied directly to the audio input features and it consists of masking blocks of frequency channels, and/or time steps. We apply SpecAugment on end-to-end speech translation tasks and achieve up to +2.2{\%} BLEU on LibriSpeech Audiobooks En{\textrightarrow}Fr and +1.2{\%} on IWSLT TED-talks En{\textrightarrow}De by alleviating overfitting to some extent. We also examine the effectiveness of the method in a variety of data scenarios and show that the method also leads to significant improvements in various data conditions irrespective of the amount of training data."
}
Markdown (Informal)
[On Using SpecAugment for End-to-End Speech Translation](https://preview.aclanthology.org/jlcl-multiple-ingestion/2019.iwslt-1.22/) (Bahar et al., IWSLT 2019)
ACL