@inproceedings{zellers-etal-2018-swag,
title = "{SWAG}: A Large-Scale Adversarial Dataset for Grounded Commonsense Inference",
author = "Zellers, Rowan and
Bisk, Yonatan and
Schwartz, Roy and
Choi, Yejin",
editor = "Riloff, Ellen and
Chiang, David and
Hockenmaier, Julia and
Tsujii, Jun{'}ichi",
booktitle = "Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing",
month = oct # "-" # nov,
year = "2018",
address = "Brussels, Belgium",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/D18-1009/",
doi = "10.18653/v1/D18-1009",
pages = "93--104",
abstract = "Given a partial description like {\textquotedblleft}she opened the hood of the car,{\textquotedblright} humans can reason about the situation and anticipate what might come next ({\textquotedblright}then, she examined the engine{\textquotedblright}). In this paper, we introduce the task of grounded commonsense inference, unifying natural language inference and commonsense reasoning. We present SWAG, a new dataset with 113k multiple choice questions about a rich spectrum of grounded situations. To address the recurring challenges of the annotation artifacts and human biases found in many existing datasets, we propose Adversarial Filtering (AF), a novel procedure that constructs a de-biased dataset by iteratively training an ensemble of stylistic classifiers, and using them to filter the data. To account for the aggressive adversarial filtering, we use state-of-the-art language models to massively oversample a diverse set of potential counterfactuals. Empirical results demonstrate that while humans can solve the resulting inference problems with high accuracy (88{\%}), various competitive models struggle on our task. We provide comprehensive analysis that indicates significant opportunities for future research."
}
Markdown (Informal)
[SWAG: A Large-Scale Adversarial Dataset for Grounded Commonsense Inference](https://preview.aclanthology.org/add-emnlp-2024-awards/D18-1009/) (Zellers et al., EMNLP 2018)
ACL