@inproceedings{drinkall-etal-2022-forecasting,
title = "Forecasting {COVID}-19 Caseloads Using Unsupervised Embedding Clusters of Social Media Posts",
author = "Drinkall, Felix and
Zohren, Stefan and
Pierrehumbert, Janet",
editor = "Carpuat, Marine and
de Marneffe, Marie-Catherine and
Meza Ruiz, Ivan Vladimir",
booktitle = "Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies",
month = jul,
year = "2022",
address = "Seattle, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2022.naacl-main.105/",
doi = "10.18653/v1/2022.naacl-main.105",
pages = "1471--1484",
abstract = "We present a novel approach incorporating transformer-based language models into infectious disease modelling. Text-derived features are quantified by tracking high-density clusters of sentence-level representations of Reddit posts within specific US states' COVID-19 subreddits. We benchmark these clustered embedding features against features extracted from other high-quality datasets. In a threshold-classification task, we show that they outperform all other feature types at predicting upward trend signals, a significant result for infectious disease modelling in areas where epidemiological data is unreliable. Subsequently, in a time-series forecasting task, we fully utilise the predictive power of the caseload and compare the relative strengths of using different supplementary datasets as covariate feature sets in a transformer-based time-series model."
}
Markdown (Informal)
[Forecasting COVID-19 Caseloads Using Unsupervised Embedding Clusters of Social Media Posts](https://preview.aclanthology.org/add-emnlp-2024-awards/2022.naacl-main.105/) (Drinkall et al., NAACL 2022)
ACL