@inproceedings{emad-eldin-2026-kashif,
title = "Kashif-{AI} at {A}bjad{G}en{E}val Shared Task: A Transformer-based Approach for {A}rabic {AI}-Generated Text Detection",
author = "Emad Eldin, Fatimah Mohamed",
booktitle = "Proceedings of the 2nd Workshop on {NLP} for Languages Using {A}rabic Script",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/manual-author-scripts/2026.abjadnlp-1.60/",
pages = "483--488",
abstract = "As Large Language Models (LLMs) become increasingly proficient at generating human-like text, distinguishing between human-written and machine-generated content has become a critical challenge for information integrity. This paper presents Kashif-AI, a system developed for the AbjadGenEval Task 1: AI-Generated Arabic Text Detection. The approach leverages fine-tuned Arabic Pre-trained Language Models (PLMs), specifically MARBERT and CAMeLBERT, to classify news articles. A rigorous ablation study was conducted to evaluate the impact of data augmentation, comparing models trained on the official shared task data against those trained on a combined corpus of over 47,000 samples. While near-perfect performance was observed during validation, the blind test set evaluation revealed a significant generalization gap. Contrary to expectations, data augmentation resulted in performance degradation due to domain shifts. The best-performing configuration, which utilized CAMeLBERT-Mix trained on the original dataset, achieved an F1-score of 66.29{\%} and an Accuracy of 70.5{\%} on the blind test set."
}Markdown (Informal)
[Kashif-AI at AbjadGenEval Shared Task: A Transformer-based Approach for Arabic AI-Generated Text Detection](https://preview.aclanthology.org/manual-author-scripts/2026.abjadnlp-1.60/) (Emad Eldin, AbjadNLP 2026)
ACL