@inproceedings{srivastava-etal-2020-indicspeech,
title = "{I}ndic{S}peech: Text-to-Speech Corpus for {I}ndian Languages",
author = "Srivastava, Nimisha and
Mukhopadhyay, Rudrabha and
K R, Prajwal and
Jawahar, C V",
booktitle = "Proceedings of the 12th Language Resources and Evaluation Conference",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.lrec-1.789",
pages = "6417--6422",
abstract = "India is a country where several tens of languages are spoken by over a billion strong population. Text-to-speech systems for such languages will thus be extremely beneficial for wide-spread content creation and accessibility. Despite this, the current TTS systems for even the most popular Indian languages fall short of the contemporary state-of-the-art systems for English, Chinese, etc. We believe that one of the major reasons for this is the lack of large, publicly available text-to-speech corpora in these languages that are suitable for training neural text-to-speech systems. To mitigate this, we release a 24 hour text-to-speech corpus for 3 major Indian languages namely Hindi, Malayalam and Bengali. In this work, we also train a state-of-the-art TTS system for each of these languages and report their performances. The collected corpus, code, and trained models are made publicly available.",
language = "English",
ISBN = "979-10-95546-34-4",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="srivastava-etal-2020-indicspeech">
<titleInfo>
<title>IndicSpeech: Text-to-Speech Corpus for Indian Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nimisha</namePart>
<namePart type="family">Srivastava</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rudrabha</namePart>
<namePart type="family">Mukhopadhyay</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Prajwal</namePart>
<namePart type="family">K R</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">C</namePart>
<namePart type="given">V</namePart>
<namePart type="family">Jawahar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-may</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 12th Language Resources and Evaluation Conference</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-34-4</identifier>
</relatedItem>
<abstract>India is a country where several tens of languages are spoken by over a billion strong population. Text-to-speech systems for such languages will thus be extremely beneficial for wide-spread content creation and accessibility. Despite this, the current TTS systems for even the most popular Indian languages fall short of the contemporary state-of-the-art systems for English, Chinese, etc. We believe that one of the major reasons for this is the lack of large, publicly available text-to-speech corpora in these languages that are suitable for training neural text-to-speech systems. To mitigate this, we release a 24 hour text-to-speech corpus for 3 major Indian languages namely Hindi, Malayalam and Bengali. In this work, we also train a state-of-the-art TTS system for each of these languages and report their performances. The collected corpus, code, and trained models are made publicly available.</abstract>
<identifier type="citekey">srivastava-etal-2020-indicspeech</identifier>
<location>
<url>https://aclanthology.org/2020.lrec-1.789</url>
</location>
<part>
<date>2020-may</date>
<extent unit="page">
<start>6417</start>
<end>6422</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T IndicSpeech: Text-to-Speech Corpus for Indian Languages
%A Srivastava, Nimisha
%A Mukhopadhyay, Rudrabha
%A K R, Prajwal
%A Jawahar, C. V.
%S Proceedings of the 12th Language Resources and Evaluation Conference
%D 2020
%8 may
%I European Language Resources Association
%C Marseille, France
%@ 979-10-95546-34-4
%G English
%F srivastava-etal-2020-indicspeech
%X India is a country where several tens of languages are spoken by over a billion strong population. Text-to-speech systems for such languages will thus be extremely beneficial for wide-spread content creation and accessibility. Despite this, the current TTS systems for even the most popular Indian languages fall short of the contemporary state-of-the-art systems for English, Chinese, etc. We believe that one of the major reasons for this is the lack of large, publicly available text-to-speech corpora in these languages that are suitable for training neural text-to-speech systems. To mitigate this, we release a 24 hour text-to-speech corpus for 3 major Indian languages namely Hindi, Malayalam and Bengali. In this work, we also train a state-of-the-art TTS system for each of these languages and report their performances. The collected corpus, code, and trained models are made publicly available.
%U https://aclanthology.org/2020.lrec-1.789
%P 6417-6422
Markdown (Informal)
[IndicSpeech: Text-to-Speech Corpus for Indian Languages](https://aclanthology.org/2020.lrec-1.789) (Srivastava et al., LREC 2020)
ACL
- Nimisha Srivastava, Rudrabha Mukhopadhyay, Prajwal K R, and C V Jawahar. 2020. IndicSpeech: Text-to-Speech Corpus for Indian Languages. In Proceedings of the 12th Language Resources and Evaluation Conference, pages 6417–6422, Marseille, France. European Language Resources Association.