@inproceedings{bayrak-issifu-2022-domain,
title = "Domain-Adapted {BERT}-based Models for Nuanced {A}rabic Dialect Identification and Tweet Sentiment Analysis",
author = "Bayrak, Giyaseddin and
Issifu, Abdul Majeed",
editor = "Bouamor, Houda and
Al-Khalifa, Hend and
Darwish, Kareem and
Rambow, Owen and
Bougares, Fethi and
Abdelali, Ahmed and
Tomeh, Nadi and
Khalifa, Salam and
Zaghouani, Wajdi",
booktitle = "Proceedings of the Seventh Arabic Natural Language Processing Workshop (WANLP)",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates (Hybrid)",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2022.wanlp-1.43/",
doi = "10.18653/v1/2022.wanlp-1.43",
pages = "425--430",
abstract = "This paper summarizes the solution of the Nuanced Arabic Dialect Identification (NADI) 2022 shared task. It consists of two subtasks: a country-level Arabic Dialect Identification (ADID) and an Arabic Sentiment Analysis (ASA). Our work shows the importance of using domain-adapted models and language-specific pre-processing in NLP task solutions. We implement a simple but strong baseline technique to increase the stability of fine-tuning settings to obtain a good generalization of models. Our best model for the Dialect Identification subtask achieves a Macro F-1 score of 25.54{\%} as an average of both Test-A (33.89{\%}) and Test-B (19.19{\%}) F-1 scores. We also obtained a Macro F-1 score of 74.29{\%} of positive and negative sentiments only, in the Sentiment Analysis task."
}
Markdown (Informal)
[Domain-Adapted BERT-based Models for Nuanced Arabic Dialect Identification and Tweet Sentiment Analysis](https://preview.aclanthology.org/fix-sig-urls/2022.wanlp-1.43/) (Bayrak & Issifu, WANLP 2022)
ACL