@inproceedings{aly-etal-2025-argan,
title = "{A}r{GAN}: {A}rabic Gender, Ability, and Nationality Dataset for Evaluating Biases in Large Language Models",
author = "Aly, Ranwa and
Allam, Yara and
Gaber, Rana and
Basta, Christine",
editor = "Fale{\'n}ska, Agnieszka and
Basta, Christine and
Costa-juss{\`a}, Marta and
Sta{\'n}czak, Karolina and
Nozza, Debora",
booktitle = "Proceedings of the 6th Workshop on Gender Bias in Natural Language Processing (GeBNLP)",
month = aug,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/display_plenaries/2025.gebnlp-1.23/",
pages = "256--267",
ISBN = "979-8-89176-277-0",
abstract = "Large language models (LLMs) are pretrained on substantial, unfiltered corpora, assembled from a variety of sources. This risks inheriting the deep-rooted biases that exist within them, both implicit and explicit. This is even more apparent in low-resource languages, where corpora may be prioritized by quantity over quality, potentially leading to more unchecked biases. More specifically, we address the biases present in the Arabic language in both general-purpose and Arabic-specialized architectures in three dimensions of demographics: gender, ability, and nationality. To properly assess the fairness of these models, we experiment with bias-revealing prompts and estimate the performance using existing evaluation metrics, and propose adaptations to others."
}
Markdown (Informal)
[ArGAN: Arabic Gender, Ability, and Nationality Dataset for Evaluating Biases in Large Language Models](https://preview.aclanthology.org/display_plenaries/2025.gebnlp-1.23/) (Aly et al., GeBNLP 2025)
ACL