@inproceedings{suman-jain-2021-astartwice,
title = "{AS}tar{T}wice at {S}em{E}val-2021 Task 5: Toxic Span Detection Using {R}o{BERT}a-{CRF}, Domain Specific Pre-Training and Self-Training",
author = "Suman, Thakur Ashutosh and
Jain, Abhinav",
editor = "Palmer, Alexis and
Schneider, Nathan and
Schluter, Natalie and
Emerson, Guy and
Herbelot, Aurelie and
Zhu, Xiaodan",
booktitle = "Proceedings of the 15th International Workshop on Semantic Evaluation (SemEval-2021)",
month = aug,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2021.semeval-1.118/",
doi = "10.18653/v1/2021.semeval-1.118",
pages = "875--880",
abstract = "This paper describes our contribution to SemEval-2021 Task 5: Toxic Spans Detection. Our solution is built upon RoBERTa language model and Conditional Random Fields (CRF). We pre-trained RoBERTa on Civil Comments dataset, enabling it to create better contextual representation for this task. We also employed the semi-supervised learning technique of self-training, which allowed us to extend our training dataset. In addition to these, we also identified some pre-processing steps that significantly improved our F1 score. Our proposed system achieved a rank of 41 with an F1 score of 66.16{\%}."
}
Markdown (Informal)
[AStarTwice at SemEval-2021 Task 5: Toxic Span Detection Using RoBERTa-CRF, Domain Specific Pre-Training and Self-Training](https://preview.aclanthology.org/add-emnlp-2024-awards/2021.semeval-1.118/) (Suman & Jain, SemEval 2021)
ACL