@inproceedings{saeed-habash-2025-lemmatizing,
title = "Lemmatizing Dialectal {A}rabic with Sequence-to-Sequence Models",
author = "Saeed, Mostafa and
Habash, Nizar",
editor = "Darwish, Kareem and
Ali, Ahmed and
Abu Farha, Ibrahim and
Touileb, Samia and
Zitouni, Imed and
Abdelali, Ahmed and
Al-Ghamdi, Sharefah and
Alkhereyf, Sakhar and
Zaghouani, Wajdi and
Khalifa, Salam and
AlKhamissi, Badr and
Almatham, Rawan and
Hamed, Injy and
Alyafeai, Zaid and
Alowisheq, Areeb and
Inoue, Go and
Mrini, Khalil and
Alshammari, Waad",
booktitle = "Proceedings of The Third Arabic Natural Language Processing Conference",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.arabicnlp-main.10/",
doi = "10.18653/v1/2025.arabicnlp-main.10",
pages = "117--129",
ISBN = "979-8-89176-352-4",
abstract = "Lemmatization for dialectal Arabic poses many challenges due to the lack of orthographic standards and limited morphological analyzers. This work explores the effectiveness of Seq2Seq models for lemmatizing dialectal Arabic, both without analyzers and with their integration. We assess how well these models generalize across dialects and benefit from related varieties. Focusing on Egyptian, Gulf, and Levantine dialects with varying resource levels, our analysis highlights both the potential and limitations of data-driven approaches. The proposed method achieves significant gains over baselines, performing well in both low-resource and dialect-rich scenarios."
}Markdown (Informal)
[Lemmatizing Dialectal Arabic with Sequence-to-Sequence Models](https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.arabicnlp-main.10/) (Saeed & Habash, ArabicNLP 2025)
ACL