@inproceedings{el-haj-2025-arabjobs,
title = "{A}rab{J}obs: A Multinational Corpus of {A}rabic Job Ads",
author = "El-Haj, Mo",
editor = "Darwish, Kareem and
Ali, Ahmed and
Abu Farha, Ibrahim and
Touileb, Samia and
Zitouni, Imed and
Abdelali, Ahmed and
Al-Ghamdi, Sharefah and
Alkhereyf, Sakhar and
Zaghouani, Wajdi and
Khalifa, Salam and
AlKhamissi, Badr and
Almatham, Rawan and
Hamed, Injy and
Alyafeai, Zaid and
Alowisheq, Areeb and
Inoue, Go and
Mrini, Khalil and
Alshammari, Waad",
booktitle = "Proceedings of The Third Arabic Natural Language Processing Conference",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-emnlp/2025.arabicnlp-main.2/",
pages = "16--25",
ISBN = "979-8-89176-352-4",
abstract = "ArabJobs is a publicly available corpus of Arabic job advertisements collected from Egypt, Jordan, Saudi Arabia, and the United Arab Emirates. Comprising over 8,500 postings and more than 550,000 words, the dataset captures linguistic, regional, and socio-economic variation in the Arab labour market. We present analyses of gender representation and occupational structure, and highlight dialectal variation across ads, which offers opportunities for future research. We also demonstrate applications such as salary estimation and job category normalisation using large language models, alongside benchmark tasks for gender bias detection and profession classification. The findings show the utility of ArabJobs for fairness-aware Arabic NLP and labour market research. The dataset is publicly available on GitHub: https://github.com/drelhaj/ArabJobs."
}Markdown (Informal)
[ArabJobs: A Multinational Corpus of Arabic Job Ads](https://preview.aclanthology.org/ingest-emnlp/2025.arabicnlp-main.2/) (El-Haj, ArabicNLP 2025)
ACL