@inproceedings{ojha-zeman-2020-universal,
title = "{U}niversal {D}ependency Treebanks for Low-Resource {I}ndian Languages: The Case of {B}hojpuri",
author = "Ojha, Atul Kr. and
Zeman, Daniel",
editor = "Jha, Girish Nath and
Bali, Kalika and
L., Sobha and
Agrawal, S. S. and
Ojha, Atul Kr.",
booktitle = "Proceedings of the WILDRE5{--} 5th Workshop on Indian Language Data: Resources and Evaluation",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association (ELRA)",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2020.wildre-1.7/",
pages = "33--38",
language = "eng",
ISBN = "979-10-95546-67-2",
abstract = "This paper presents the first dependency treebank for Bhojpuri, a resource-poor language that belongs to the Indo-Aryan language family. The objective behind the Bhojpuri Treebank (BHTB) project is to create a substantial, syntactically annotated treebank which not only acts as a valuable resource in building language technological tools, also helps in cross-lingual learning and typological research. Currently, the treebank consists of 4,881 annotated tokens in accordance with the annotation scheme of Universal Dependencies (UD). A Bhojpuri tagger and parser were created using machine learning approach. The accuracy of the model is 57.49{\%} UAS, 45.50{\%} LAS, 79.69{\%} UPOS accuracy and 77.64{\%} XPOS accuracy. The paper describes the details of the project including a discussion on linguistic analysis and annotation process of the Bhojpuri UD treebank."
}
Markdown (Informal)
[Universal Dependency Treebanks for Low-Resource Indian Languages: The Case of Bhojpuri](https://preview.aclanthology.org/add-emnlp-2024-awards/2020.wildre-1.7/) (Ojha & Zeman, WILDRE 2020)
ACL