@inproceedings{gaanoun-benelallam-2020-arabic,
title = "{A}rabic dialect identification: An {A}rabic-{BERT} model with data augmentation and ensembling strategy",
author = "Gaanoun, Kamel and
Benelallam, Imade",
editor = "Zitouni, Imed and
Abdul-Mageed, Muhammad and
Bouamor, Houda and
Bougares, Fethi and
El-Haj, Mahmoud and
Tomeh, Nadi and
Zaghouani, Wajdi",
booktitle = "Proceedings of the Fifth Arabic Natural Language Processing Workshop",
month = dec,
year = "2020",
address = "Barcelona, Spain (Online)",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2020.wanlp-1.28/",
pages = "275--281",
abstract = "This paper presents the ArabicProcessors team`s deep learning system designed for the NADI 2020 Subtask 1 (country-level dialect identification) and Subtask 2 (province-level dialect identification). We used Arabic-Bert in combination with data augmentation and ensembling methods. Unlabeled data provided by task organizers (10 Million tweets) was split into multiple subparts, to which we applied semi-supervised learning method, and finally ran a specific ensembling process on the resulting models. This system ranked 3rd in Subtask 1 with 23.26{\%} F1-score and 2nd in Subtask 2 with 5.75{\%} F1-score."
}
Markdown (Informal)
[Arabic dialect identification: An Arabic-BERT model with data augmentation and ensembling strategy](https://preview.aclanthology.org/jlcl-multiple-ingestion/2020.wanlp-1.28/) (Gaanoun & Benelallam, WANLP 2020)
ACL