@inproceedings{muller-etal-2016-towards,
title = "Towards Improving Low-Resource Speech Recognition Using Articulatory and Language Features",
author = {M{\"u}ller, Markus and
St{\"u}ker, Sebastian and
Waibel, Alex},
booktitle = "Proceedings of the 13th International Conference on Spoken Language Translation",
month = dec # " 8-9",
year = "2016",
address = "Seattle, Washington D.C",
publisher = "International Workshop on Spoken Language Translation",
url = "https://aclanthology.org/2016.iwslt-1.9",
abstract = "In an increasingly globalized world, there is a rising demand for speech recognition systems. Systems for languages like English, German or French do achieve a decent performance, but there exists a long tail of languages for which such systems do not yet exist. State-of-the-art speech recognition systems feature Deep Neural Networks (DNNs). Being a data driven method and therefore highly dependent on sufficient training data, the lack of resources directly affects the recognition performance. There exist multiple techniques to deal with such resource constraint conditions, one approach is the use of additional data from other languages. In the past, is was demonstrated that multilingually trained systems benefit from adding language feature vectors (LFVs) to the input features, similar to i-Vectors. In this work, we extend this approach by the addition of articulatory features (AFs). We show that AFs also benefit from LFVs and that multilingual system setups benefit from adding both AFs and LFVs. Pretending English to be a low-resource language, we restricted ourselves to use only 10h of English acoustic training data. For system training, we use additional data from French, German and Turkish. By using a combination of AFs and LFVs, we were able to decrease the WER from 18.1{\%} to 17.3{\%} after system combination in our setup using a multilingual phone set.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="muller-etal-2016-towards">
<titleInfo>
<title>Towards Improving Low-Resource Speech Recognition Using Articulatory and Language Features</title>
</titleInfo>
<name type="personal">
<namePart type="given">Markus</namePart>
<namePart type="family">Müller</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Stüker</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alex</namePart>
<namePart type="family">Waibel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016-dec" 8-9"</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 13th International Conference on Spoken Language Translation</title>
</titleInfo>
<originInfo>
<publisher>International Workshop on Spoken Language Translation</publisher>
<place>
<placeTerm type="text">Seattle, Washington D.C</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In an increasingly globalized world, there is a rising demand for speech recognition systems. Systems for languages like English, German or French do achieve a decent performance, but there exists a long tail of languages for which such systems do not yet exist. State-of-the-art speech recognition systems feature Deep Neural Networks (DNNs). Being a data driven method and therefore highly dependent on sufficient training data, the lack of resources directly affects the recognition performance. There exist multiple techniques to deal with such resource constraint conditions, one approach is the use of additional data from other languages. In the past, is was demonstrated that multilingually trained systems benefit from adding language feature vectors (LFVs) to the input features, similar to i-Vectors. In this work, we extend this approach by the addition of articulatory features (AFs). We show that AFs also benefit from LFVs and that multilingual system setups benefit from adding both AFs and LFVs. Pretending English to be a low-resource language, we restricted ourselves to use only 10h of English acoustic training data. For system training, we use additional data from French, German and Turkish. By using a combination of AFs and LFVs, we were able to decrease the WER from 18.1% to 17.3% after system combination in our setup using a multilingual phone set.</abstract>
<identifier type="citekey">muller-etal-2016-towards</identifier>
<location>
<url>https://aclanthology.org/2016.iwslt-1.9</url>
</location>
<part>
<date>2016-dec" 8-9"</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Towards Improving Low-Resource Speech Recognition Using Articulatory and Language Features
%A Müller, Markus
%A Stüker, Sebastian
%A Waibel, Alex
%S Proceedings of the 13th International Conference on Spoken Language Translation
%D 2016
%8 dec" 8 9"
%I International Workshop on Spoken Language Translation
%C Seattle, Washington D.C
%F muller-etal-2016-towards
%X In an increasingly globalized world, there is a rising demand for speech recognition systems. Systems for languages like English, German or French do achieve a decent performance, but there exists a long tail of languages for which such systems do not yet exist. State-of-the-art speech recognition systems feature Deep Neural Networks (DNNs). Being a data driven method and therefore highly dependent on sufficient training data, the lack of resources directly affects the recognition performance. There exist multiple techniques to deal with such resource constraint conditions, one approach is the use of additional data from other languages. In the past, is was demonstrated that multilingually trained systems benefit from adding language feature vectors (LFVs) to the input features, similar to i-Vectors. In this work, we extend this approach by the addition of articulatory features (AFs). We show that AFs also benefit from LFVs and that multilingual system setups benefit from adding both AFs and LFVs. Pretending English to be a low-resource language, we restricted ourselves to use only 10h of English acoustic training data. For system training, we use additional data from French, German and Turkish. By using a combination of AFs and LFVs, we were able to decrease the WER from 18.1% to 17.3% after system combination in our setup using a multilingual phone set.
%U https://aclanthology.org/2016.iwslt-1.9
Markdown (Informal)
[Towards Improving Low-Resource Speech Recognition Using Articulatory and Language Features](https://aclanthology.org/2016.iwslt-1.9) (Müller et al., IWSLT 2016)
ACL