@inproceedings{frohberg-binder-2022-crass,
title = "{CRASS}: A Novel Data Set and Benchmark to Test Counterfactual Reasoning of Large Language Models",
author = {Frohberg, J{\"o}rg and
Binder, Frank},
editor = "Calzolari, Nicoletta and
B{\'e}chet, Fr{\'e}d{\'e}ric and
Blache, Philippe and
Choukri, Khalid and
Cieri, Christopher and
Declerck, Thierry and
Goggi, Sara and
Isahara, Hitoshi and
Maegaard, Bente and
Mariani, Joseph and
Mazo, H{\'e}l{\`e}ne and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Thirteenth Language Resources and Evaluation Conference",
month = jun,
year = "2022",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://preview.aclanthology.org/fix-sig-urls/2022.lrec-1.229/",
pages = "2126--2140",
abstract = "We introduce the CRASS (counterfactual reasoning assessment) data set and benchmark utilizing questionized counterfactual conditionals as a novel and powerful tool to evaluate large language models. We present the data set design and benchmark. We test six state-of-the-art models against our benchmark. Our results show that it poses a valid challenge for these models and opens up considerable room for their improvement."
}
Markdown (Informal)
[CRASS: A Novel Data Set and Benchmark to Test Counterfactual Reasoning of Large Language Models](https://preview.aclanthology.org/fix-sig-urls/2022.lrec-1.229/) (Frohberg & Binder, LREC 2022)
ACL