@inproceedings{hussein-etal-2021-damascusteam,
title = "{D}amascus{T}eam at {NLP}4{IF}2021: Fighting the {A}rabic {COVID}-19 Infodemic on {T}witter Using {A}ra{BERT}",
author = "Hussein, Ahmad and
Ghneim, Nada and
Joukhadar, Ammar",
editor = "Feldman, Anna and
Da San Martino, Giovanni and
Leberknight, Chris and
Nakov, Preslav",
booktitle = "Proceedings of the Fourth Workshop on NLP for Internet Freedom: Censorship, Disinformation, and Propaganda",
month = jun,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2021.nlp4if-1.13/",
doi = "10.18653/v1/2021.nlp4if-1.13",
pages = "93--98",
abstract = "The objective of this work was the introduction of an effective approach based on the AraBERT language model for fighting Tweets COVID-19 Infodemic. It was arranged in the form of a two-step pipeline, where the first step involved a series of pre-processing procedures to transform Twitter jargon, including emojis and emoticons, into plain text, and the second step exploited a version of AraBERT, which was pre-trained on plain text, to fine-tune and classify the tweets with respect to their Label. The use of language models pre-trained on plain texts rather than on tweets was motivated by the necessity to address two critical issues shown by the scientific literature, namely (1) pre-trained language models are widely available in many languages, avoiding the time-consuming and resource-intensive model training directly on tweets from scratch, allowing to focus only on their fine-tuning; (2) available plain text corpora are larger than tweet-only ones, allowing for better performance."
}
Markdown (Informal)
[DamascusTeam at NLP4IF2021: Fighting the Arabic COVID-19 Infodemic on Twitter Using AraBERT](https://preview.aclanthology.org/add-emnlp-2024-awards/2021.nlp4if-1.13/) (Hussein et al., NLP4IF 2021)
ACL