@inproceedings{alshenaifi-azmi-2020-faheem,
title = "Faheem at {NADI} shared task: Identifying the dialect of {A}rabic tweet",
author = "AlShenaifi, Nouf and
Azmi, Aqil",
editor = "Zitouni, Imed and
Abdul-Mageed, Muhammad and
Bouamor, Houda and
Bougares, Fethi and
El-Haj, Mahmoud and
Tomeh, Nadi and
Zaghouani, Wajdi",
booktitle = "Proceedings of the Fifth Arabic Natural Language Processing Workshop",
month = dec,
year = "2020",
address = "Barcelona, Spain (Online)",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2020.wanlp-1.29/",
pages = "282--287",
abstract = "This paper describes Faheem (adj. of understand), our submission to NADI (Nuanced Arabic Dialect Identification) shared task. With so many Arabic dialects being under-studied due to the scarcity of the resources, the objective is to identify the Arabic dialect used in the tweet, country wise. We propose a machine learning approach where we utilize word-level n-gram (n = 1 to 3) and tf-idf features and feed them to six different classifiers. We train the system using a data set of 21,000 tweets{---}provided by the organizers{---}covering twenty-one Arab countries. Our top performing classifiers are: Logistic Regression, Support Vector Machines, and Multinomial Na {\ensuremath{\ddot{}}}{\i}ve Bayes."
}
Markdown (Informal)
[Faheem at NADI shared task: Identifying the dialect of Arabic tweet](https://preview.aclanthology.org/jlcl-multiple-ingestion/2020.wanlp-1.29/) (AlShenaifi & Azmi, WANLP 2020)
ACL