@inproceedings{dongare-2024-creating,
title = "Creating Corpus of Low Resource {I}ndian Languages for Natural Language Processing: Challenges and Opportunities",
author = "Dongare, Pratibha",
editor = "Jha, Girish Nath and
L., Sobha and
Bali, Kalika and
Ojha, Atul Kr.",
booktitle = "Proceedings of the 7th Workshop on Indian Language Data: Resources and Evaluation",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://preview.aclanthology.org/Author-page-Marten-During-lu/2024.wildre-1.8/",
pages = "54--58",
abstract = "Addressing tasks in Natural Language Processing requires access to sufficient and high-quality data. However, working with languages that have limited resources poses a significant challenge due to the absence of established methodologies, frameworks, and collaborative efforts. This paper intends to briefly outline the challenges associated with standardization in data creation, focusing on Indian languages, which are often categorized as low resource languages. Additionally, potential solutions and the importance of standardized procedures for low-resource language data are proposed. Furthermore, the critical role of standardized protocols in corpus creation and their impact on research is highlighted. Lastly, this paper concludes by defining what constitutes a corpus."
}
Markdown (Informal)
[Creating Corpus of Low Resource Indian Languages for Natural Language Processing: Challenges and Opportunities](https://preview.aclanthology.org/Author-page-Marten-During-lu/2024.wildre-1.8/) (Dongare, WILDRE 2024)
ACL