@inproceedings{wu-yarowsky-2021-pronunciations,
title = "On Pronunciations in {W}iktionary: Extraction and Experiments on Multilingual Syllabification and Stress Prediction",
author = "Wu, Winston and
Yarowsky, David",
editor = "Rapp, Reinhard and
Sharoff, Serge and
Zweigenbaum, Pierre",
booktitle = "Proceedings of the 14th Workshop on Building and Using Comparable Corpora (BUCC 2021)",
month = sep,
year = "2021",
address = "Online (Virtual Mode)",
publisher = "INCOMA Ltd.",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2021.bucc-1.9/",
pages = "68--74",
abstract = "We constructed parsers for five non-English editions of Wiktionary, which combined with pronunciations from the English edition, comprises over 5.3 million IPA pronunciations, the largest pronunciation lexicon of its kind. This dataset is a unique comparable corpus of IPA pronunciations annotated from multiple sources. We analyze the dataset, noting the presence of machine-generated pronunciations. We develop a novel visualization method to quantify syllabification. We experiment on the new combined task of multilingual IPA syllabification and stress prediction, finding that training a massively multilingual neural sequence-to-sequence model with copy attention can improve performance on both high- and low-resource languages, and multi-task training on stress prediction helps with syllabification."
}
Markdown (Informal)
[On Pronunciations in Wiktionary: Extraction and Experiments on Multilingual Syllabification and Stress Prediction](https://preview.aclanthology.org/add-emnlp-2024-awards/2021.bucc-1.9/) (Wu & Yarowsky, BUCC 2021)
ACL