@inproceedings{alzubi-reynolds-2026-transformer,
title = "Transformer-based readability classifiers are worse than you think: Evidence from cross-domain {A}rabic readability assessment",
author = "Alzu{'}Bi, Sarh and
Reynolds, Robert",
editor = "Kochmar, Ekaterina and
Alhafni, Bashar and
Bann{\`o}, Stefano and
Bexte, Marie and
Burstein, Jill and
Horbach, Andrea and
Laarmann-Quante, Ronja and
Tack, Anais and
Yaneva, Victoria and
Yuan, Zheng",
booktitle = "Proceedings of the 21st Workshop on Innovative Use of {NLP} for Building Educational Applications ({BEA} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.bea-1.52/",
pages = "766--776",
ISBN = "979-8-89176-409-5",
abstract = "Arabic readability assessment is under-explored compared to English, and existing models are typically evaluated only within the training domain. We introduce the Jordanian School Textbook Corpus (JSTC), 82,512 segments from 240 textbooks spanning grades 1{--}12, and combine it with DARES to train XGBoost classifiers, fine-tuned CAMeLBERT transformers, and hybrid architectures evaluated both in-domain and on the BAREC out-of-domain benchmark. CAMeLBERT achieves strong in-domain performance (QWK = 0.830) but its cross-domain QWK collapses to 0.085, while XGBoost over 127 handcrafted linguistic features alone maintains the highest cross-domain QWK (0.240); adding [CLS] embeddings to those features actively harms transfer. Probing reveals that CAMeLBERT layers implicitly capture some linguistic features but higher-level signals overwhelm them, and Captum attribution identifies nouns and nominal particles such as al- as the most important tokens. The results argue for prioritizing linguistically-grounded features over contextual embeddings when cross-domain robustness is required."
}Markdown (Informal)
[Transformer-based readability classifiers are worse than you think: Evidence from cross-domain Arabic readability assessment](https://preview.aclanthology.org/ingest-acl-workshops/2026.bea-1.52/) (Alzu’Bi & Reynolds, BEA 2026)
ACL