@inproceedings{yang-2025-shud,
title = "{S}h{UD}: the First Shanghainese {U}niversal {D}ependency Treebank",
author = "Yang, Qizhen",
editor = {Bomma, Gosse and
{\c{C}}{\"o}ltekin, {\c{C}}a{\u{g}}r{\i}},
booktitle = "Proceedings of the Eighth Workshop on Universal Dependencies (UDW, SyntaxFest 2025)",
month = aug,
year = "2025",
address = "Ljubljana, Slovenia",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/mtsummit-25-ingestion/2025.udw-1.20/",
pages = "186--193",
ISBN = "979-8-89176-292-3",
abstract = "This paper introduces ShUD, the first Universal Dependencies (UD) treebank for Shanghainese, a Wu Chinese variant spoken by approximately 14 million people but severely under-resourced in NLP. The treebank is built through a scalable annotation pipeline that exploits grammatical parallels between Shanghainese and Mandarin. Our pipeline also provides a practical strategy for bootstrapping resources for other Chinese dialects. We documented syntactic phenomena unique to Shanghainese within the UD framework and fine-tuned a dependency parser using our annotated treebank, contributing a foundation to both NLP tool development and cross-linguistic syntactic research."
}
Markdown (Informal)
[ShUD: the First Shanghainese Universal Dependency Treebank](https://preview.aclanthology.org/mtsummit-25-ingestion/2025.udw-1.20/) (Yang, UDW-SyntaxFest 2025)
ACL