@inproceedings{mohamed-eida-etal-2024-well,
title = "How Well Do Tweets Represent Sub-Dialects of {E}gyptian {A}rabic?",
author = "Mohamed Eida, Mai and
Nassar, Mayar and
Dunn, Jonathan",
editor = {Scherrer, Yves and
Jauhiainen, Tommi and
Ljube{\v{s}}i{\'c}, Nikola and
Zampieri, Marcos and
Nakov, Preslav and
Tiedemann, J{\"o}rg},
booktitle = "Proceedings of the Eleventh Workshop on NLP for Similar Languages, Varieties, and Dialects (VarDial 2024)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2024.vardial-1.4/",
doi = "10.18653/v1/2024.vardial-1.4",
pages = "41--55",
abstract = "How well does naturally-occurring digital text, such as Tweets, represent sub-dialects of Egyptian Arabic (EA)? This paper focuses on two EA sub-dialects: Cairene Egyptian Arabic (CEA) and Sa{'}idi Egyptian Arabic (SEA). We use morphological markers from ground-truth dialect surveys as a distance measure across four geo-referenced datasets. Results show that CEA markers are prevalent as expected in CEA geo-referenced tweets, while SEA markers are limited across SEA geo-referenced tweets. SEA tweets instead show a prevalence of CEA markers and higher usage of Modern Standard Arabic. We conclude that corpora intended to represent sub-dialects of EA do not accurately represent sub-dialects outside of the Cairene variety. This finding calls into question the validity of relying on tweets alone to represent dialectal differences."
}
Markdown (Informal)
[How Well Do Tweets Represent Sub-Dialects of Egyptian Arabic?](https://preview.aclanthology.org/fix-sig-urls/2024.vardial-1.4/) (Mohamed Eida et al., VarDial 2024)
ACL
- Mai Mohamed Eida, Mayar Nassar, and Jonathan Dunn. 2024. How Well Do Tweets Represent Sub-Dialects of Egyptian Arabic?. In Proceedings of the Eleventh Workshop on NLP for Similar Languages, Varieties, and Dialects (VarDial 2024), pages 41–55, Mexico City, Mexico. Association for Computational Linguistics.