@inproceedings{ghoul-lejeune-2019-michael,
title = "{MICHAEL}: Mining Character-level Patterns for {A}rabic Dialect Identification ({MADAR} Challenge)",
author = {Ghoul, Dhaou and
Lejeune, Ga{\"e}l},
editor = "El-Hajj, Wassim and
Belguith, Lamia Hadrich and
Bougares, Fethi and
Magdy, Walid and
Zitouni, Imed and
Tomeh, Nadi and
El-Haj, Mahmoud and
Zaghouani, Wajdi",
booktitle = "Proceedings of the Fourth Arabic Natural Language Processing Workshop",
month = aug,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/W19-4627/",
doi = "10.18653/v1/W19-4627",
pages = "229--233",
abstract = "We present MICHAEL, a simple lightweight method for automatic Arabic Dialect Identification on the MADAR travel domain Dialect Identification (DID). MICHAEL uses simple character-level features in order to perform a pre-processing free classification. More precisely, Character N-grams extracted from the original sentences are used to train a Multinomial Naive Bayes classifier. This system achieved an official score (accuracy) of 53.25{\%} with 1{\ensuremath{<}}=N{\ensuremath{<}}=3 but showed a much better result with character 4-grams (62.17{\%} accuracy)."
}
Markdown (Informal)
[MICHAEL: Mining Character-level Patterns for Arabic Dialect Identification (MADAR Challenge)](https://preview.aclanthology.org/jlcl-multiple-ingestion/W19-4627/) (Ghoul & Lejeune, WANLP 2019)
ACL