@inproceedings{peters-etal-2017-massively,
title = "Massively Multilingual Neural Grapheme-to-Phoneme Conversion",
author = "Peters, Ben and
Dehdari, Jon and
van Genabith, Josef",
editor = "Bender, Emily and
Daum{\'e} III, Hal and
Ettinger, Allyson and
Rao, Sudha",
booktitle = "Proceedings of the First Workshop on Building Linguistically Generalizable {NLP} Systems",
month = sep,
year = "2017",
address = "Copenhagen, Denmark",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/W17-5403/",
doi = "10.18653/v1/W17-5403",
pages = "19--26",
abstract = "Grapheme-to-phoneme conversion (g2p) is necessary for text-to-speech and automatic speech recognition systems. Most g2p systems are monolingual: they require language-specific data or handcrafting of rules. Such systems are difficult to extend to low resource languages, for which data and handcrafted rules are not available. As an alternative, we present a neural sequence-to-sequence approach to g2p which is trained on spelling{--}pronunciation pairs in hundreds of languages. The system shares a single encoder and decoder across all languages, allowing it to utilize the intrinsic similarities between different writing systems. We show an 11{\%} improvement in phoneme error rate over an approach based on adapting high-resource monolingual g2p models to low-resource languages. Our model is also much more compact relative to previous approaches."
}
Markdown (Informal)
[Massively Multilingual Neural Grapheme-to-Phoneme Conversion](https://preview.aclanthology.org/jlcl-multiple-ingestion/W17-5403/) (Peters et al., 2017)
ACL