@inproceedings{ding-jurgens-2021-hamiltondinggg,
title = "{H}amilton{D}inggg at {S}em{E}val-2021 Task 5: Investigating Toxic Span Detection using {R}o{BERT}a Pre-training",
author = "Ding, Huiyang and
Jurgens, David",
editor = "Palmer, Alexis and
Schneider, Nathan and
Schluter, Natalie and
Emerson, Guy and
Herbelot, Aurelie and
Zhu, Xiaodan",
booktitle = "Proceedings of the 15th International Workshop on Semantic Evaluation (SemEval-2021)",
month = aug,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2021.semeval-1.31/",
doi = "10.18653/v1/2021.semeval-1.31",
pages = "263--269",
abstract = "This paper presents our system submission to task 5: Toxic Spans Detection of the SemEval-2021 competition. The competition aims at detecting the spans that make a toxic span toxic. In this paper, we demonstrate our system for detecting toxic spans, which includes expanding the toxic training set with Local Interpretable Model-Agnostic Explanations (LIME), fine-tuning RoBERTa model for detection, and error analysis. We found that feeding the model with an expanded training set using Reddit comments of polarized-toxicity and labeling with LIME on top of logistic regression classification could help RoBERTa more accurately learn to recognize toxic spans. We achieved a span-level F1 score of 0.6715 on the testing phase. Our quantitative and qualitative results show that the predictions from our system could be a good supplement to the gold training set{'}s annotations."
}
Markdown (Informal)
[HamiltonDinggg at SemEval-2021 Task 5: Investigating Toxic Span Detection using RoBERTa Pre-training](https://preview.aclanthology.org/fix-sig-urls/2021.semeval-1.31/) (Ding & Jurgens, SemEval 2021)
ACL