@inproceedings{gao-etal-2025-wenzhou,
title = "Wenzhou Dialect Speech to {M}andarin Text Conversion",
author = "Gao, Zhipeng and
Tamura, Akihiro and
Kato, Tsuneo",
editor = "Ojha, Atul Kr. and
Liu, Chao-hong and
Vylomova, Ekaterina and
Pirinen, Flammie and
Washington, Jonathan and
Oco, Nathaniel and
Zhao, Xiaobing",
booktitle = "Proceedings of the Eighth Workshop on Technologies for Machine Translation of Low-Resource Languages (LoResMT 2025)",
month = may,
year = "2025",
address = "Albuquerque, New Mexico, U.S.A.",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2025.loresmt-1.5/",
pages = "36--43",
ISBN = "979-8-89176-230-5",
abstract = "The Wenzhou dialect is a Chinese dialect that is significantly distinct from Mandarin, the official language of China. It is among the most complex Chinese dialects and is nearly incomprehensible to people from regions such as Northern China, thereby creating substantial communication barriers. Therefore, the conversion between the Wenzhou dialect and Mandarin is essential to facilitate communication between Wenzhou dialect speakers and those from other Chinese regions. However, as a low-resource language, the Wenzhou dialect lacks publicly available datasets, and such conversion technologies have not been extensively researched. Thus, in this study, we create a parallel dataset containing Wenzhou dialect speech and the corresponding Mandarin text and build benchmark models for Wenzhou dialect speech-to-Mandarin text conversion. In particular, we fine-tune two self-supervised learning-based pretrained models, that is, TeleSpeech-ASR1.0 and Wav2Vec2-XLS-R, with our training dataset and report their performance on our test dataset as baselines for future research."
}
Markdown (Informal)
[Wenzhou Dialect Speech to Mandarin Text Conversion](https://preview.aclanthology.org/fix-sig-urls/2025.loresmt-1.5/) (Gao et al., LoResMT 2025)
ACL
- Zhipeng Gao, Akihiro Tamura, and Tsuneo Kato. 2025. Wenzhou Dialect Speech to Mandarin Text Conversion. In Proceedings of the Eighth Workshop on Technologies for Machine Translation of Low-Resource Languages (LoResMT 2025), pages 36–43, Albuquerque, New Mexico, U.S.A.. Association for Computational Linguistics.