@inproceedings{shehadi-wintner-2022-identifying,
title = "Identifying Code-switching in {A}rabizi",
author = "Shehadi, Safaa and
Wintner, Shuly",
editor = "Bouamor, Houda and
Al-Khalifa, Hend and
Darwish, Kareem and
Rambow, Owen and
Bougares, Fethi and
Abdelali, Ahmed and
Tomeh, Nadi and
Khalifa, Salam and
Zaghouani, Wajdi",
booktitle = "Proceedings of the Seventh Arabic Natural Language Processing Workshop (WANLP)",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates (Hybrid)",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2022.wanlp-1.18/",
doi = "10.18653/v1/2022.wanlp-1.18",
pages = "194--204",
abstract = "We describe a corpus of social media posts that include utterances in Arabizi, a Roman-script rendering of Arabic, mixed with other languages, notably English, French, and Arabic written in the Arabic script. We manually annotated a subset of the texts with word-level language IDs; this is a non-trivial task due to the nature of mixed-language writing, especially on social media. We developed classifiers that can accurately predict the language ID tags. Then, we extended the word-level predictions to identify sentences that include Arabizi (and code-switching), and applied the classifiers to the raw corpus, thereby harvesting a large number of additional instances. The result is a large-scale dataset of Arabizi, with precise indications of code-switching between Arabizi and English, French, and Arabic."
}
Markdown (Informal)
[Identifying Code-switching in Arabizi](https://preview.aclanthology.org/add-emnlp-2024-awards/2022.wanlp-1.18/) (Shehadi & Wintner, WANLP 2022)
ACL
- Safaa Shehadi and Shuly Wintner. 2022. Identifying Code-switching in Arabizi. In Proceedings of the Seventh Arabic Natural Language Processing Workshop (WANLP), pages 194–204, Abu Dhabi, United Arab Emirates (Hybrid). Association for Computational Linguistics.