@inproceedings{nafis-etal-2023-towards,
title = "Towards Safer Communities: Detecting Aggression and Offensive Language in Code-Mixed Tweets to Combat Cyberbullying",
author = "Nafis, Nazia and
Kanojia, Diptesh and
Saini, Naveen and
Murthy, Rudra",
editor = {Chung, Yi-ling and
R{{\textbackslash}"ottger}, Paul and
Nozza, Debora and
Talat, Zeerak and
Mostafazadeh Davani, Aida},
booktitle = "The 7th Workshop on Online Abuse and Harms (WOAH)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.woah-1.3",
doi = "10.18653/v1/2023.woah-1.3",
pages = "29--41",
abstract = "Cyberbullying is a serious societal issue widespread on various channels and platforms, particularly social networking sites. Such platforms have proven to be exceptionally fertile grounds for such behavior. The dearth of high-quality training data for multilingual and low-resource scenarios, data that can accurately capture the nuances of social media conversations, often poses a roadblock to this task. This paper attempts to tackle cyberbullying, specifically its two most common manifestations - aggression and offensiveness. We present a novel, manually annotated dataset of a total of 10,000 English and Hindi-English code-mixed tweets, manually annotated for aggression detection and offensive language detection tasks. Our annotations are supported by inter-annotator agreement scores of 0.67 and 0.74 for the two tasks, indicating substantial agreement. We perform comprehensive fine-tuning of pre-trained language models (PTLMs) using this dataset to check its efficacy. Our challenging test sets show that the best models achieve macro F1-scores of 67.87 and 65.45 on the two tasks, respectively. Further, we perform cross-dataset transfer learning to benchmark our dataset against existing aggression and offensive language datasets. We also present a detailed quantitative and qualitative analysis of errors in prediction, and with this paper, we publicly release the novel dataset, code, and models.",
}
Markdown (Informal)
[Towards Safer Communities: Detecting Aggression and Offensive Language in Code-Mixed Tweets to Combat Cyberbullying](https://aclanthology.org/2023.woah-1.3) (Nafis et al., WOAH 2023)
ACL