@inproceedings{moteu-ngoli-etal-2025-challenges,
title = "Challenges and Limitations in Gathering Resources for Low-Resource Languages: The Case of {M}edumba",
author = "Moteu Ngoli, Tatiana and
Christabel, Mbuh and
Yopa, Njeunga",
editor = "Lignos, Constantine and
Abdulmumin, Idris and
Adelani, David",
booktitle = "Proceedings of the Sixth Workshop on African Natural Language Processing (AfricaNLP 2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/acl25-workshop-ingestion/2025.africanlp-1.18/",
pages = "136--142",
ISBN = "979-8-89176-257-2",
abstract = "Low-resource languages face significant challenges in natural language processing due to the scarcity of annotated data, linguistic resources, and the lack of language standardization, which leads to variations in grammar, vocabulary, and writing systems. This issue is particularly observed in many African languages, which significantly reduces their usability. To bridge this barrier, this paper investigates the challenges and limitations of collecting datasets for the Medumba language, a Grassfields Bantu language spoken in Cameroon, in the context of extremely low-resource natural language processing. We mainly focus on the specificity of this language, including its grammatical and lexical structure. Our findings highlight key barriers, including (1) the challenges in typing and encoding Latin scripts, (2) the absence of standardized translations for technical and scientific terms, and (3) the challenge of limited digital resources and financial constraints, highlighting the need to improve data strategies and collaboration to advance computational research on African languages. We hope that our study informs the development of better tools and policies to make knowledge platforms more accessible to extremely low-resource language speakers. We further discuss the representation of the language, data collection, parallel corpus development."
}
Markdown (Informal)
[Challenges and Limitations in Gathering Resources for Low-Resource Languages: The Case of Medumba](https://preview.aclanthology.org/acl25-workshop-ingestion/2025.africanlp-1.18/) (Moteu Ngoli et al., AfricaNLP 2025)
ACL