@inproceedings{lee-etal-2020-massively,
title = "Massively Multilingual Pronunciation Modeling with {W}iki{P}ron",
author = "Lee, Jackson L. and
Ashby, Lucas F.E. and
Garza, M. Elizabeth and
Lee-Sikka, Yeonju and
Miller, Sean and
Wong, Alan and
McCarthy, Arya D. and
Gorman, Kyle",
booktitle = "Proceedings of the 12th Language Resources and Evaluation Conference",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.lrec-1.521",
pages = "4223--4228",
abstract = "We introduce WikiPron, an open-source command-line tool for extracting pronunciation data from Wiktionary, a collaborative multilingual online dictionary. We first describe the design and use of WikiPron. We then discuss the challenges faced scaling this tool to create an automatically-generated database of 1.7 million pronunciations from 165 languages. Finally, we validate the pronunciation database by using it to train and evaluating a collection of generic grapheme-to-phoneme models. The software, pronunciation data, and models are all made available under permissive open-source licenses.",
language = "English",
ISBN = "979-10-95546-34-4",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lee-etal-2020-massively">
<titleInfo>
<title>Massively Multilingual Pronunciation Modeling with WikiPron</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jackson</namePart>
<namePart type="given">L</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lucas</namePart>
<namePart type="given">F.E.</namePart>
<namePart type="family">Ashby</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">M</namePart>
<namePart type="given">Elizabeth</namePart>
<namePart type="family">Garza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yeonju</namePart>
<namePart type="family">Lee-Sikka</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sean</namePart>
<namePart type="family">Miller</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alan</namePart>
<namePart type="family">Wong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arya</namePart>
<namePart type="given">D</namePart>
<namePart type="family">McCarthy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kyle</namePart>
<namePart type="family">Gorman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-may</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 12th Language Resources and Evaluation Conference</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-34-4</identifier>
</relatedItem>
<abstract>We introduce WikiPron, an open-source command-line tool for extracting pronunciation data from Wiktionary, a collaborative multilingual online dictionary. We first describe the design and use of WikiPron. We then discuss the challenges faced scaling this tool to create an automatically-generated database of 1.7 million pronunciations from 165 languages. Finally, we validate the pronunciation database by using it to train and evaluating a collection of generic grapheme-to-phoneme models. The software, pronunciation data, and models are all made available under permissive open-source licenses.</abstract>
<identifier type="citekey">lee-etal-2020-massively</identifier>
<location>
<url>https://aclanthology.org/2020.lrec-1.521</url>
</location>
<part>
<date>2020-may</date>
<extent unit="page">
<start>4223</start>
<end>4228</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Massively Multilingual Pronunciation Modeling with WikiPron
%A Lee, Jackson L.
%A Ashby, Lucas F.E.
%A Garza, M. Elizabeth
%A Lee-Sikka, Yeonju
%A Miller, Sean
%A Wong, Alan
%A McCarthy, Arya D.
%A Gorman, Kyle
%S Proceedings of the 12th Language Resources and Evaluation Conference
%D 2020
%8 may
%I European Language Resources Association
%C Marseille, France
%@ 979-10-95546-34-4
%G English
%F lee-etal-2020-massively
%X We introduce WikiPron, an open-source command-line tool for extracting pronunciation data from Wiktionary, a collaborative multilingual online dictionary. We first describe the design and use of WikiPron. We then discuss the challenges faced scaling this tool to create an automatically-generated database of 1.7 million pronunciations from 165 languages. Finally, we validate the pronunciation database by using it to train and evaluating a collection of generic grapheme-to-phoneme models. The software, pronunciation data, and models are all made available under permissive open-source licenses.
%U https://aclanthology.org/2020.lrec-1.521
%P 4223-4228
Markdown (Informal)
[Massively Multilingual Pronunciation Modeling with WikiPron](https://aclanthology.org/2020.lrec-1.521) (Lee et al., LREC 2020)
ACL
- Jackson L. Lee, Lucas F.E. Ashby, M. Elizabeth Garza, Yeonju Lee-Sikka, Sean Miller, Alan Wong, Arya D. McCarthy, and Kyle Gorman. 2020. Massively Multilingual Pronunciation Modeling with WikiPron. In Proceedings of the 12th Language Resources and Evaluation Conference, pages 4223–4228, Marseille, France. European Language Resources Association.