@inproceedings{wali-nisioi-2026-automatic,
title = "Automatic Correction of Writing Anomalies in {H}ausa Texts",
author = "Wali, Ahmad Mustapha and
Nisioi, Sergiu",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.acl-long.430/",
pages = "9514--9528",
ISBN = "979-8-89176-390-6",
abstract = "Hausa texts are often characterized by writing anomalies such as incorrect character substitutions and spacing errors, which sometimes hinder natural language processing (NLP) applications. This paper presents an approach to automatically correct the anomalies by finetuning transformer-based models. Using a corpus gathered from several public sources, we create a large-scale parallel dataset of over 400,000 noisy-clean Hausa sentence pairs by introducing synthetically generated noise to mimic realistic writing errors. Moreover, we finetune several multilingual and African language models, including M2M100, AfriTeVA, NCAIR1/N-ATLaS, UBC-NLP/cheetah-base, and other variants of BART and T5 for this correction task. Our experimental results demonstrate that models such as M2M100 achieve state-of-the-art results despite their smaller size and distinct pretraining, and that correcting errors can have a significant impact in improving downstream tasks such as text classification, machine translation, question answering, and LLM prompting in general. This research provides a methodology, a publicly available dataset, and a comparison of models to improve Hausa text quality, thereby advancing NLP capabilities for the language and offering transferable insights for other low-resource languages."
}Markdown (Informal)
[Automatic Correction of Writing Anomalies in Hausa Texts](https://preview.aclanthology.org/ingest-acl/2026.acl-long.430/) (Wali & Nisioi, ACL 2026)
ACL
- Ahmad Mustapha Wali and Sergiu Nisioi. 2026. Automatic Correction of Writing Anomalies in Hausa Texts. In Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 9514–9528, San Diego, California, United States. Association for Computational Linguistics.