@inproceedings{chauhan-2020-neu,
title = "{NEU} at {WNUT}-2020 Task 2: Data Augmentation To Tell {BERT} That Death Is Not Necessarily Informative",
author = "Chauhan, Kumud",
editor = "Xu, Wei and
Ritter, Alan and
Baldwin, Tim and
Rahimi, Afshin",
booktitle = "Proceedings of the Sixth Workshop on Noisy User-generated Text (W-NUT 2020)",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2020.wnut-1.64/",
doi = "10.18653/v1/2020.wnut-1.64",
pages = "440--443",
abstract = "Millions of people around the world are sharing COVID-19 related information on social media platforms. Since not all the information shared on the social media is useful, a machine learning system to identify informative posts can help users in finding relevant information. In this paper, we present a BERT classifier system for W-NUT2020 Shared Task 2: Identification of Informative COVID-19 English Tweets. Further, we show that BERT exploits some easy signals to identify informative tweets, and adding simple patterns to uninformative tweets drastically degrades BERT performance. In particular, simply adding {\textquotedblleft}10 deaths{\textquotedblright} to tweets in dev set, reduces BERT F1- score from 92.63 to 7.28. We also propose a simple data augmentation technique that helps in improving the robustness and generalization ability of the BERT classifier."
}
Markdown (Informal)
[NEU at WNUT-2020 Task 2: Data Augmentation To Tell BERT That Death Is Not Necessarily Informative](https://preview.aclanthology.org/add-emnlp-2024-awards/2020.wnut-1.64/) (Chauhan, WNUT 2020)
ACL