@inproceedings{ahmed-etal-2020-preparation,
title = "Preparation of {B}angla Speech Corpus from Publicly Available Audio {\&} Text",
author = "Ahmed, Shafayat and
Sadeq, Nafis and
Shubha, Sudipta Saha and
Islam, Md. Nahidul and
Adnan, Muhammad Abdullah and
Islam, Mohammad Zuberul",
booktitle = "Proceedings of the 12th Language Resources and Evaluation Conference",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.lrec-1.811",
pages = "6586--6592",
abstract = "Automatic speech recognition systems require large annotated speech corpus. The manual annotation of a large corpus is very difficult. In this paper, we focus on the automatic preparation of a speech corpus for Bangladeshi Bangla. We have used publicly available Bangla audiobooks and TV news recordings as audio sources. We designed and implemented an iterative algorithm that takes as input a speech corpus and a huge amount of raw audio (without transcription) and outputs a much larger speech corpus with reasonable confidence. We have leveraged speaker diarization, gender detection, etc. to prepare the annotated corpus. We also have prepared a synthetic speech corpus for handling out-of-vocabulary word problems in Bangla language. Our corpus is suitable for training with Kaldi. Experimental results show that the use of our corpus in addition to the Google Speech corpus (229 hours) significantly improves the performance of the ASR system.",
language = "English",
ISBN = "979-10-95546-34-4",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ahmed-etal-2020-preparation">
<titleInfo>
<title>Preparation of Bangla Speech Corpus from Publicly Available Audio & Text</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shafayat</namePart>
<namePart type="family">Ahmed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nafis</namePart>
<namePart type="family">Sadeq</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sudipta</namePart>
<namePart type="given">Saha</namePart>
<namePart type="family">Shubha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Md.</namePart>
<namePart type="given">Nahidul</namePart>
<namePart type="family">Islam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Muhammad</namePart>
<namePart type="given">Abdullah</namePart>
<namePart type="family">Adnan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Zuberul</namePart>
<namePart type="family">Islam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-may</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 12th Language Resources and Evaluation Conference</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-34-4</identifier>
</relatedItem>
<abstract>Automatic speech recognition systems require large annotated speech corpus. The manual annotation of a large corpus is very difficult. In this paper, we focus on the automatic preparation of a speech corpus for Bangladeshi Bangla. We have used publicly available Bangla audiobooks and TV news recordings as audio sources. We designed and implemented an iterative algorithm that takes as input a speech corpus and a huge amount of raw audio (without transcription) and outputs a much larger speech corpus with reasonable confidence. We have leveraged speaker diarization, gender detection, etc. to prepare the annotated corpus. We also have prepared a synthetic speech corpus for handling out-of-vocabulary word problems in Bangla language. Our corpus is suitable for training with Kaldi. Experimental results show that the use of our corpus in addition to the Google Speech corpus (229 hours) significantly improves the performance of the ASR system.</abstract>
<identifier type="citekey">ahmed-etal-2020-preparation</identifier>
<location>
<url>https://aclanthology.org/2020.lrec-1.811</url>
</location>
<part>
<date>2020-may</date>
<extent unit="page">
<start>6586</start>
<end>6592</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Preparation of Bangla Speech Corpus from Publicly Available Audio & Text
%A Ahmed, Shafayat
%A Sadeq, Nafis
%A Shubha, Sudipta Saha
%A Islam, Md. Nahidul
%A Adnan, Muhammad Abdullah
%A Islam, Mohammad Zuberul
%S Proceedings of the 12th Language Resources and Evaluation Conference
%D 2020
%8 may
%I European Language Resources Association
%C Marseille, France
%@ 979-10-95546-34-4
%G English
%F ahmed-etal-2020-preparation
%X Automatic speech recognition systems require large annotated speech corpus. The manual annotation of a large corpus is very difficult. In this paper, we focus on the automatic preparation of a speech corpus for Bangladeshi Bangla. We have used publicly available Bangla audiobooks and TV news recordings as audio sources. We designed and implemented an iterative algorithm that takes as input a speech corpus and a huge amount of raw audio (without transcription) and outputs a much larger speech corpus with reasonable confidence. We have leveraged speaker diarization, gender detection, etc. to prepare the annotated corpus. We also have prepared a synthetic speech corpus for handling out-of-vocabulary word problems in Bangla language. Our corpus is suitable for training with Kaldi. Experimental results show that the use of our corpus in addition to the Google Speech corpus (229 hours) significantly improves the performance of the ASR system.
%U https://aclanthology.org/2020.lrec-1.811
%P 6586-6592
Markdown (Informal)
[Preparation of Bangla Speech Corpus from Publicly Available Audio & Text](https://aclanthology.org/2020.lrec-1.811) (Ahmed et al., LREC 2020)
ACL