@inproceedings{yamoah-etal-2025-building,
title = "Building a Functional Machine Translation Corpus for {K}pelle",
author = "Yamoah, Kweku Andoh and
Weako, Jackson and
Dorley, Emmanuel",
editor = "Lignos, Constantine and
Abdulmumin, Idris and
Adelani, David",
booktitle = "Proceedings of the Sixth Workshop on African Natural Language Processing (AfricaNLP 2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/display_plenaries/2025.africanlp-1.8/",
pages = "52--63",
ISBN = "979-8-89176-257-2",
abstract = "In this paper, we introduce the first publicly available English-Kpelle dataset for machine translation, comprising over 2,000 sentence pairs drawn from everyday communication, religious texts, and educational materials. By fine-tuning Metas No Language Left Behind (NLLB) model on two versions of the dataset, we achieved BLEU scores of up to 30 in the Kpelle-to-English direction, demonstrating the benefits of data augmentation. Our findings align with NLLB-200 benchmarks on other African languages, underscoring Kpelles potential for competitive performance despite its low-resource status. Beyond machine translation, this dataset enables broader NLP tasks, including speech recognition and language modeling. We conclude with a roadmap for future dataset expansion, emphasizing orthographic consistency, community-driven validation, and interdisciplinary collaboration to advance inclusive language technology development for Kpelle and other low-resourced Mande languages."
}
Markdown (Informal)
[Building a Functional Machine Translation Corpus for Kpelle](https://preview.aclanthology.org/display_plenaries/2025.africanlp-1.8/) (Yamoah et al., AfricaNLP 2025)
ACL