@inproceedings{jahan-etal-2022-banglahatebert,
title = "{B}angla{H}ate{BERT}: {BERT} for Abusive Language Detection in {B}engali",
author = "Jahan, Md Saroar and
Haque, Mainul and
Arhab, Nabil and
Oussalah, Mourad",
editor = "Monti, Johanna and
Basile, Valerio and
Buono, Maria Pia Di and
Manna, Raffaele and
Pascucci, Antonio and
Tonelli, Sara",
booktitle = "Proceedings of the Second International Workshop on Resources and Techniques for User Information in Abusive Language Analysis",
month = jun,
year = "2022",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2022.restup-1.2",
pages = "8--15",
abstract = "This paper introduces BanglaHateBERT, a retrained BERT model for abusive language detection in Bengali. The model was trained with a large-scale Bengali offensive, abusive, and hateful corpus that we have collected from different sources and made available to the public. Furthermore, we have collected and manually annotated 15K Bengali hate speech balanced dataset and made it publicly available for the research community. We used existing pre-trained BanglaBERT model and retrained it with 1.5 million offensive posts. We presented the results of a detailed comparison between generic pre-trained language model and retrained with the abuse-inclined version. In all datasets, BanglaHateBERT outperformed the corresponding available BERT model.",
}
Markdown (Informal)
[BanglaHateBERT: BERT for Abusive Language Detection in Bengali](https://aclanthology.org/2022.restup-1.2) (Jahan et al., ResTUP 2022)
ACL
- Md Saroar Jahan, Mainul Haque, Nabil Arhab, and Mourad Oussalah. 2022. BanglaHateBERT: BERT for Abusive Language Detection in Bengali. In Proceedings of the Second International Workshop on Resources and Techniques for User Information in Abusive Language Analysis, pages 8–15, Marseille, France. European Language Resources Association.