@inproceedings{ghoul-lejeune-2019-michael,
    title = "{MICHAEL}: Mining Character-level Patterns for {A}rabic Dialect Identification ({MADAR} Challenge)",
    author = {Ghoul, Dhaou  and
      Lejeune, Ga{\"e}l},
    editor = "El-Hajj, Wassim  and
      Belguith, Lamia Hadrich  and
      Bougares, Fethi  and
      Magdy, Walid  and
      Zitouni, Imed  and
      Tomeh, Nadi  and
      El-Haj, Mahmoud  and
      Zaghouani, Wajdi",
    booktitle = "Proceedings of the Fourth Arabic Natural Language Processing Workshop",
    month = aug,
    year = "2019",
    address = "Florence, Italy",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/iwcs-25-ingestion/W19-4627/",
    doi = "10.18653/v1/W19-4627",
    pages = "229--233",
    abstract = "We present MICHAEL, a simple lightweight method for automatic Arabic Dialect Identification on the MADAR travel domain Dialect Identification (DID). MICHAEL uses simple character-level features in order to perform a pre-processing free classification. More precisely, Character N-grams extracted from the original sentences are used to train a Multinomial Naive Bayes classifier. This system achieved an official score (accuracy) of 53.25{\%} with 1{\ensuremath{<}}=N{\ensuremath{<}}=3 but showed a much better result with character 4-grams (62.17{\%} accuracy)."
}Markdown (Informal)
[MICHAEL: Mining Character-level Patterns for Arabic Dialect Identification (MADAR Challenge)](https://preview.aclanthology.org/iwcs-25-ingestion/W19-4627/) (Ghoul & Lejeune, WANLP 2019)
ACL