@inproceedings{zaghouani-2026-arabic,
title = "{A}rabic Humor as a Diagnostic Probe for Large Language Models",
author = "Zaghouani, Wajdi",
editor = "Amir, Ori and
Hempelmann, Christian F. and
Rayz, Julia and
Dong, Tiansi and
Miller, Tristan",
booktitle = "Proceedings of the 2nd Workshop on Computational Humor ({CH}um 2026)",
month = jul,
year = "2026",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.chum-1.3/",
pages = "39--50",
ISBN = "979-8-89176-431-6",
abstract = "Arabic humor provides a challenging diagnostic test for large language models because interpreting jokes often requires pragmatic inference, sociolinguistic awareness, and culturally grounded knowledge that standard NLP benchmarks do not evaluate. Arabic is particularly suitable for probing these abilities given its diglossic structure and dialect diversity, where humor frequently arises from register contrast, dialect-specific vocabulary, and shared cultural references. We propose a three-layer taxonomy of Arabic humor mechanisms covering pragmatic, semantic, and sociolinguistic phenomena, illustrated through thirteen curated examples spanning Egyptian, Levantine, Gulf, Tunisian, and Iraqi Arabic. Building on this taxonomy, we introduce a diagnostic evaluation framework using contrastive minimal pairs, a multi-dimensional scoring rubric, and a cultural presupposition ontology. A small proof-of-concept probing study with GPT-4o, Gemini 2.0 Flash, and Claude Sonnet 4.5 reveals recurring failure patterns in sarcasm interpretation, register contrast reasoning, dialectal vocabulary coverage, and cultural grounding. We position this work as a diagnostic framework and pilot, not a mature benchmark, and outline a path toward larger annotated resources."
}Markdown (Informal)
[Arabic Humor as a Diagnostic Probe for Large Language Models](https://preview.aclanthology.org/ingest-acl-workshops/2026.chum-1.3/) (Zaghouani, chum 2026)
ACL