@inproceedings{gonzalez-etal-2026-tale,
title = "A Tale of Two Scripts: Transliteration and Post-Correction for {J}udeo-{A}rabic",
author = "Gonzalez, Juan Moreno and
Alhafni, Bashar and
Habash, Nizar",
editor = "Demberg, Vera and
Inui, Kentaro and
Marquez, Llu{\'i}s",
booktitle = "Proceedings of the 19th Conference of the {E}uropean Chapter of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-eacl/2026.eacl-long.93/",
pages = "2100--2113",
ISBN = "979-8-89176-380-7",
abstract = "Judeo-Arabic refers to Arabic variants historically spoken by Jewish communities across the Arab world, primarily during the Middle Ages. Unlike standard Arabic, it is written in Hebrew script by Jewish writers and for Jewish audiences. Transliterating Judeo-Arabic into Arabic script is challenging due to ambiguous letter mappings, inconsistent orthographic conventions, and frequent code-switching into Hebrew. In this paper, we introduce a two-step approach to automatically transliterate Judeo-Arabic into Arabic script: simple character-level mapping followed by post-correction to address grammatical and orthographic errors. We also present the first benchmark evaluation of LLMs on this task. Finally, we show that transliteration enables Arabic NLP tools to perform morphosyntactic tagging and machine translation, which would have not been feasible on the original texts. We make our code and data publicly available."
}Markdown (Informal)
[A Tale of Two Scripts: Transliteration and Post-Correction for Judeo-Arabic](https://preview.aclanthology.org/ingest-eacl/2026.eacl-long.93/) (Gonzalez et al., EACL 2026)
ACL