@inproceedings{zbrze-z-ny-etal-2026-arabic,
title = "The {A}rabic {B}ible as an Evaluation Tool: The Case Study of the Khal{\={i}}l{\={i}} {A}rabic Dialect",
author = "Zbrze{\.z}ny, Jakub and
Reiter, Ehud and
Zhao, Wei",
editor = "Mahamood, Saad and
Howcroft, David M. and
van Deemter, Kees and
Balloccu, Simone and
Sivaprasad, Adarsa and
Sundararajan, Barkavi and
Bugar{\'i}n Diz, Alberto and
Alonso-Moral, Jose Mar{\'i}a",
booktitle = "Proceedings of the 1st Symposium on Natural Language Generation Evaluations",
month = jun,
year = "2026",
address = "Aberdeen, United Kingdom",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-retroeval/2026.retroeval-main.4/",
pages = "24--32",
ISBN = "979-8-89176-436-1",
abstract = "The paper presents a fully documented case study of how high-quality data combined with evaluators' expertise can be utilised for conducting basic NLP experiments in the realm of low-resource languages such as local varieties of Colloquial Arabic, and how the Arabic Bible, hitherto underutilised in NLP, can serve as an evaluation tool. Our experiments on one of the rural Palestinian Arabic dialects of al-Khal{\={i}}l / Hebron illustrate two points. On the one hand, popular models are clearly limited in their ability to produce outputs of a high level of dialectal specificity (here: rural area surrounding a major urban centre). On the other hand, they are capable to generate accurate translations from such dialects into Modern Standard Arabic. Thus, the models appear better at understanding dialects than at producing dialects."
}Markdown (Informal)
[The Arabic Bible as an Evaluation Tool: The Case Study of the Khalīlī Arabic Dialect](https://preview.aclanthology.org/ingest-retroeval/2026.retroeval-main.4/) (Zbrzeżny et al., RetroEval 2026)
ACL