@inproceedings{khan-etal-2021-lone,
title = "Lone Pine at {S}em{E}val-2021 Task 5: Fine-Grained Detection of Hate Speech Using {BERT}oxic",
author = "Khan, Yakoob and
Ma, Weicheng and
Vosoughi, Soroush",
editor = "Palmer, Alexis and
Schneider, Nathan and
Schluter, Natalie and
Emerson, Guy and
Herbelot, Aurelie and
Zhu, Xiaodan",
booktitle = "Proceedings of the 15th International Workshop on Semantic Evaluation (SemEval-2021)",
month = aug,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2021.semeval-1.132/",
doi = "10.18653/v1/2021.semeval-1.132",
pages = "967--973",
abstract = "This paper describes our approach to the Toxic Spans Detection problem (SemEval-2021 Task 5). We propose BERToxic, a system that fine-tunes a pre-trained BERT model to locate toxic text spans in a given text and utilizes additional post-processing steps to refine the boundaries. The post-processing steps involve (1) labeling character offsets between consecutive toxic tokens as toxic and (2) assigning a toxic label to words that have at least one token labeled as toxic. Through experiments, we show that these two post-processing steps improve the performance of our model by 4.16{\%} on the test set. We also studied the effects of data augmentation and ensemble modeling strategies on our system. Our system significantly outperformed the provided baseline and achieved an F1-score of 0.683, placing Lone Pine in the 17th place out of 91 teams in the competition. Our code is made available at \url{https://github.com/Yakoob-Khan/Toxic-Spans-Detection}"
}
Markdown (Informal)
[Lone Pine at SemEval-2021 Task 5: Fine-Grained Detection of Hate Speech Using BERToxic](https://preview.aclanthology.org/fix-sig-urls/2021.semeval-1.132/) (Khan et al., SemEval 2021)
ACL