@inproceedings{abu-kwaik-saad-2019-arbdialectid,
title = "{A}rb{D}ialect{ID} at {MADAR} Shared Task 1: Language Modelling and Ensemble Learning for Fine Grained {A}rabic Dialect Identification",
author = "Abu Kwaik, Kathrein and
Saad, Motaz",
editor = "El-Hajj, Wassim and
Belguith, Lamia Hadrich and
Bougares, Fethi and
Magdy, Walid and
Zitouni, Imed and
Tomeh, Nadi and
El-Haj, Mahmoud and
Zaghouani, Wajdi",
booktitle = "Proceedings of the Fourth Arabic Natural Language Processing Workshop",
month = aug,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/W19-4632/",
doi = "10.18653/v1/W19-4632",
pages = "254--258",
abstract = "In this paper, we present a Dialect Identification system (ArbDialectID) that competed at Task 1 of the MADAR shared task, MADARTravel Domain Dialect Identification. We build a course and a fine-grained identification model to predict the label (corresponding to a dialect of Arabic) of a given text. We build two language models by extracting features at two levels (words and characters). We firstly build a coarse identification model to classify each sentence into one out of six dialects, then use this label as a feature for the fine-grained model that classifies the sentence among 26 dialects from different Arab cities, after that we apply ensemble voting classifier on both sub-systems. Our system ranked 1st that achieving an f-score of 67.32{\%}. Both the models and our feature engineering tools are made available to the research community."
}
Markdown (Informal)
[ArbDialectID at MADAR Shared Task 1: Language Modelling and Ensemble Learning for Fine Grained Arabic Dialect Identification](https://preview.aclanthology.org/add-emnlp-2024-awards/W19-4632/) (Abu Kwaik & Saad, WANLP 2019)
ACL