@inproceedings{albared-etal-2023-arabic,
title = "{A}rabic Topic Classification in the Generative and {A}uto{ML} Era",
author = "Albared, Doha and
Hamoud, Hadi and
Zaraket, Fadi",
editor = "Sawaf, Hassan and
El-Beltagy, Samhaa and
Zaghouani, Wajdi and
Magdy, Walid and
Abdelali, Ahmed and
Tomeh, Nadi and
Abu Farha, Ibrahim and
Habash, Nizar and
Khalifa, Salam and
Keleg, Amr and
Haddad, Hatem and
Zitouni, Imed and
Mrini, Khalil and
Almatham, Rawan",
booktitle = "Proceedings of ArabicNLP 2023",
month = dec,
year = "2023",
address = "Singapore (Hybrid)",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.arabicnlp-1.32/",
doi = "10.18653/v1/2023.arabicnlp-1.32",
pages = "399--404",
abstract = "Most recent models for Arabic topic classification leveraged fine-tuning existing pre-trained transformer models and targeted a limited number of categories. More recently, advances in automated ML and generative models introduced novel potentials for the task. While these approaches work for English, it is a question of whether they perform well for low-resourced languages; Arabic in particular. This paper presents (i) ArBoNeClass; a novel Arabic dataset with an extended 14-topic class set covering modern books from social sciences and humanities along with newspaper articles, and (ii) a set of topic classifiers built from it. We finetuned an open LLM model to build ArGTClass. We compared its performance against the best models built with Vertex AI (Google), AutoML(H2O), and AutoTrain(HuggingFace). ArGTClass outperformed the VertexAi and AutoML models and was reasonably similar to the AutoTrain model."
}
Markdown (Informal)
[Arabic Topic Classification in the Generative and AutoML Era](https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.arabicnlp-1.32/) (Albared et al., ArabicNLP 2023)
ACL