@article{matlatipov-aripov-2026-uzudt,
title = "{U}z{UDT}: {U}zbek {U}niversal {D}ependencies Treebank",
author = "Matlatipov, Sanatbek Gayratovich and
Aripov, Mersaid",
editor = "Piperidis, Stelios and
Bel, N{\'u}ria and
van den Heuvel, Henk and
Ide, Nancy and
Krek, Simon and
Toral, Antonio",
journal = "International Conference on Language Resources and Evaluation",
volume = "main",
month = may,
year = "2026",
address = "Palma de Mallorca, Spain",
publisher = "ELRA Language Resource Association",
url = "https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.912/",
pages = "11642--11649",
abstract = "In this paper, we present a new Universal Dependencies treebank for Uzbek language(UzUDT) developed as a gold-standard resource with full manual annotation. The treebank includes 684 sentences (7,582 tokens) from Uzbek literary texts, and is larger and more domain-diverse than the existing Uzbek UD treebank. The corpus was developed through rigorous multi-annotator adjudication, achieving very high inter-annotator agreement (multi-rater agreement coefficients {\ensuremath{>}}0.90) across lemmatization, PoS tagging, and morphological features. Alongside comprehensive corpus profiling, we establish robust computational baselines by evaluating graph-based (Stanza) and transition-based (spaCy) parsing architectures using both static and monolingual contextual embeddings. Our evaluations reveal a critical architectural trade-off for low-resource agglutinative parsing: joint transition-based models excel at morphosyntactic tagging, whereas graph-based models remain strictly superior for resolving complex structural dependencies. Furthermore, we demonstrate that cross-treebank data augmentation yields substantial, synergistic accuracy gains. The resource provides a much-needed high-quality treebank for Uzbek to assist in developing better NLP tools and to enable linguistic research in the low-resource language"
}Markdown (Informal)
[UzUDT: Uzbek Universal Dependencies Treebank](https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.912/) (Matlatipov & Aripov, LREC 2026)
ACL