@inproceedings{kunze-etal-2017-transfer,
title = "Transfer Learning for Speech Recognition on a Budget",
author = "Kunze, Julius and
Kirsch, Louis and
Kurenkov, Ilia and
Krug, Andreas and
Johannsmeier, Jens and
Stober, Sebastian",
booktitle = "Proceedings of the 2nd Workshop on Representation Learning for {NLP}",
month = aug,
year = "2017",
address = "Vancouver, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W17-2620",
doi = "10.18653/v1/W17-2620",
pages = "168--177",
abstract = "End-to-end training of automated speech recognition (ASR) systems requires massive data and compute resources. We explore transfer learning based on model adaptation as an approach for training ASR models under constrained GPU memory, throughput and training data. We conduct several systematic experiments adapting a Wav2Letter convolutional neural network originally trained for English ASR to the German language. We show that this technique allows faster training on consumer-grade resources while requiring less training data in order to achieve the same accuracy, thereby lowering the cost of training ASR models in other languages. Model introspection revealed that small adaptations to the network{'}s weights were sufficient for good performance, especially for inner layers.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kunze-etal-2017-transfer">
<titleInfo>
<title>Transfer Learning for Speech Recognition on a Budget</title>
</titleInfo>
<name type="personal">
<namePart type="given">Julius</namePart>
<namePart type="family">Kunze</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Louis</namePart>
<namePart type="family">Kirsch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ilia</namePart>
<namePart type="family">Kurenkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Krug</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jens</namePart>
<namePart type="family">Johannsmeier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Stober</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-aug</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd Workshop on Representation Learning for NLP</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vancouver, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>End-to-end training of automated speech recognition (ASR) systems requires massive data and compute resources. We explore transfer learning based on model adaptation as an approach for training ASR models under constrained GPU memory, throughput and training data. We conduct several systematic experiments adapting a Wav2Letter convolutional neural network originally trained for English ASR to the German language. We show that this technique allows faster training on consumer-grade resources while requiring less training data in order to achieve the same accuracy, thereby lowering the cost of training ASR models in other languages. Model introspection revealed that small adaptations to the network’s weights were sufficient for good performance, especially for inner layers.</abstract>
<identifier type="citekey">kunze-etal-2017-transfer</identifier>
<identifier type="doi">10.18653/v1/W17-2620</identifier>
<location>
<url>https://aclanthology.org/W17-2620</url>
</location>
<part>
<date>2017-aug</date>
<extent unit="page">
<start>168</start>
<end>177</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Transfer Learning for Speech Recognition on a Budget
%A Kunze, Julius
%A Kirsch, Louis
%A Kurenkov, Ilia
%A Krug, Andreas
%A Johannsmeier, Jens
%A Stober, Sebastian
%S Proceedings of the 2nd Workshop on Representation Learning for NLP
%D 2017
%8 aug
%I Association for Computational Linguistics
%C Vancouver, Canada
%F kunze-etal-2017-transfer
%X End-to-end training of automated speech recognition (ASR) systems requires massive data and compute resources. We explore transfer learning based on model adaptation as an approach for training ASR models under constrained GPU memory, throughput and training data. We conduct several systematic experiments adapting a Wav2Letter convolutional neural network originally trained for English ASR to the German language. We show that this technique allows faster training on consumer-grade resources while requiring less training data in order to achieve the same accuracy, thereby lowering the cost of training ASR models in other languages. Model introspection revealed that small adaptations to the network’s weights were sufficient for good performance, especially for inner layers.
%R 10.18653/v1/W17-2620
%U https://aclanthology.org/W17-2620
%U https://doi.org/10.18653/v1/W17-2620
%P 168-177
Markdown (Informal)
[Transfer Learning for Speech Recognition on a Budget](https://aclanthology.org/W17-2620) (Kunze et al., 2017)
ACL
- Julius Kunze, Louis Kirsch, Ilia Kurenkov, Andreas Krug, Jens Johannsmeier, and Sebastian Stober. 2017. Transfer Learning for Speech Recognition on a Budget. In Proceedings of the 2nd Workshop on Representation Learning for NLP, pages 168–177, Vancouver, Canada. Association for Computational Linguistics.