@inproceedings{zhao-chodroff-2022-mandi,
title = "The {M}an{D}i Corpus: A Spoken Corpus of {M}andarin Regional Dialects",
author = "Zhao, Liang and
Chodroff, Eleanor",
editor = "Calzolari, Nicoletta and
B{\'e}chet, Fr{\'e}d{\'e}ric and
Blache, Philippe and
Choukri, Khalid and
Cieri, Christopher and
Declerck, Thierry and
Goggi, Sara and
Isahara, Hitoshi and
Maegaard, Bente and
Mariani, Joseph and
Mazo, H{\'e}l{\`e}ne and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Thirteenth Language Resources and Evaluation Conference",
month = jun,
year = "2022",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2022.lrec-1.213/",
pages = "1985--1990",
abstract = "In the present paper, we introduce the ManDi Corpus, a spoken corpus of regional Mandarin dialects and Standard Mandarin. The corpus currently contains 357 recordings (about 9.6 hours) of monosyllabic words, disyllabic words, short sentences, a short passage and a poem, each produced in Standard Mandarin and in one of six regional Mandarin dialects: Beijing, Chengdu, Jinan, Taiyuan, Wuhan, and Xi`an Mandarin from 36 speakers. The corpus was collected remotely using participant-controlled smartphone recording apps. Word- and phone-level alignments were generated using Praat and the Montreal Forced Aligner. The pilot study of dialect-specific tone systems showed that with practicable design and decent recording quality, remotely collected speech data can be suitable for analysis of relative patterns in acoustic-phonetic realization. The corpus is available on OSF (\url{https://osf.io/fgv4w/}) for non-commercial use under a CC BY-NC 3.0 license."
}
Markdown (Informal)
[The ManDi Corpus: A Spoken Corpus of Mandarin Regional Dialects](https://preview.aclanthology.org/jlcl-multiple-ingestion/2022.lrec-1.213/) (Zhao & Chodroff, LREC 2022)
ACL