@inproceedings{mohamed-al-azani-2025-enhancing,
title = "Enhancing {A}rabic {NLP} Tasks through Character-Level Models and Data Augmentation",
author = "Mohamed, Mohanad and
Al-Azani, Sadam",
editor = "Rambow, Owen and
Wanner, Leo and
Apidianaki, Marianna and
Al-Khalifa, Hend and
Eugenio, Barbara Di and
Schockaert, Steven",
booktitle = "Proceedings of the 31st International Conference on Computational Linguistics",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2025.coling-main.186/",
pages = "2744--2757",
abstract = "This study introduces a character-level approach specifically designed for Arabic NLP tasks, offering a novel and highly effective solution to the unique challenges inherent in Arabic language processing. It presents a thorough comparative study of various character-level models, including Convolutional Neural Networks (CNNs), pre-trained transformers (CANINE), and Bidirectional Long Short-Term Memory networks (BiLSTMs), assessing their performance and exploring the impact of different data augmentation techniques on enhancing their effectiveness. Additionally, it introduces two innovative Arabic-specific data augmentation methods{---}vowel deletion and style transfer{---}and rigorously evaluates their effectiveness. The proposed approach was evaluated on Arabic privacy policy classification task as a case study, demonstrating significant improvements in model performance, reporting a micro-averaged F1-score of 93.8{\%}, surpassing state-of-the-art models."
}
Markdown (Informal)
[Enhancing Arabic NLP Tasks through Character-Level Models and Data Augmentation](https://preview.aclanthology.org/fix-sig-urls/2025.coling-main.186/) (Mohamed & Al-Azani, COLING 2025)
ACL