@inproceedings{hendi-2026-xlmr,
title = "{XLMR}-{U}rdu at {A}bjad{G}en{E}val Shared Task: A Data-Centric Transformer-Based Approach for {AI}-Generated {U}rdu Text Detection",
author = "Hendi, Mohannad Mohammad",
booktitle = "Proceedings of the 2nd Workshop on {NLP} for Languages Using {A}rabic Script",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/manual-author-scripts/2026.abjadnlp-1.56/",
pages = "461--467",
abstract = "The rapid advancement of large language models (LLMs) has led to a substantial increase in automatically generated textual content, raising concerns regarding misinformation, plagiarism, and authorship verification. These challenges are particularly pronounced for low-resource languages such as Urdu, where limited annotated data and complex linguistic properties hinder robust detection. In this paper, we present a transformer-based approach for binary classification of human-written versus AI-generated Urdu text, developed for the AbjadGenEval Task 2 shared task. Beyond model fine-tuning, we adopt a data-centric perspective, emphasizing dataset diagnostics, document-level inference, and calibration strategies. Our system achieves strong performance on the official test set, with an F1-score of 88.68{\%} and balanced accuracy of 88.71{\%}. Through empirical analysis, we demonstrate that dataset characteristics and generator-specific artifacts play a dominant role in model generalization, highlighting critical directions for future research in low-resource AI-generated text detection."
}Markdown (Informal)
[XLMR-Urdu at AbjadGenEval Shared Task: A Data-Centric Transformer-Based Approach for AI-Generated Urdu Text Detection](https://preview.aclanthology.org/manual-author-scripts/2026.abjadnlp-1.56/) (Hendi, AbjadNLP 2026)
ACL