@inproceedings{luo-etal-2021-ochadai,
title = "{OCHADAI} at {SMM}4{H}-2021 Task 5: Classifying self-reporting tweets on potential cases of {COVID}-19 by ensembling pre-trained language models",
author = "Luo, Ying and
Pereira, Lis and
Ichiro, Kobayashi",
editor = "Magge, Arjun and
Klein, Ari and
Miranda-Escalada, Antonio and
Al-garadi, Mohammed Ali and
Alimova, Ilseyar and
Miftahutdinov, Zulfat and
Farre-Maduell, Eulalia and
Lopez, Salvador Lima and
Flores, Ivan and
O'Connor, Karen and
Weissenbacher, Davy and
Tutubalina, Elena and
Sarker, Abeed and
Banda, Juan M and
Krallinger, Martin and
Gonzalez-Hernandez, Graciela",
booktitle = "Proceedings of the Sixth Social Media Mining for Health ({\#}SMM4H) Workshop and Shared Task",
month = jun,
year = "2021",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2021.smm4h-1.25/",
doi = "10.18653/v1/2021.smm4h-1.25",
pages = "123--125",
abstract = {Since the outbreak of coronavirus at the end of 2019, there have been numerous studies on coro- navirus in the NLP arena. Meanwhile, Twitter has been a valuable source of news and a pub- lic medium for the conveyance of information and personal expression. This paper describes the system developed by the Ochadai team for the Social Media Mining for Health Appli- cations (SMM4H) 2021 Task 5, which aims to automatically distinguish English tweets that self-report potential cases of COVID-19 from those that do not. We proposed a model ensemble that leverages pre-trained represen- tations from COVID-Twitter-BERT (M{\"u}ller et al., 2020), RoBERTa (Liu et al., 2019), and Twitter-RoBERTa (Glazkova et al., 2021). Our model obtained F1-scores of 76{\%} on the test set in the evaluation phase, and 77.5{\%} in the post-evaluation phase.}
}