@inproceedings{ranaldi-zanzotto-2024-hans,
title = "{HANS}, are you clever? Clever Hans Effect Analysis of Neural Systems",
author = "Ranaldi, Leonardo and
Zanzotto, Fabio",
editor = "Bollegala, Danushka and
Shwartz, Vered",
booktitle = "Proceedings of the 13th Joint Conference on Lexical and Computational Semantics (*SEM 2024)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2024.starsem-1.25/",
doi = "10.18653/v1/2024.starsem-1.25",
pages = "314--325",
abstract = "Large Language Models (LLMs) have been exhibiting outstanding abilities to reason around cognitive states, intentions, and reactions of all people involved, letting humans guide and comprehend day-to-day social interactions effectively. In fact, several multiple-choice questions (MCQ) benchmarks have been proposed to construct solid assessments of the models' abilities. However, earlier works demonstrate the presence of inherent {\textquotedblleft}order bias{\textquotedblright} in LLMs, posing challenges to the appropriate evaluation. In this paper, we investigate LLMs' resilience abilities through a series of probing tests using four MCQ benchmarks. Introducing adversarial examples, we show a significant performance gap, mainly when varying the order of the choices, which reveals a selection bias and brings into discussion reasoning abilities. Following a correlation between first positions and model choices due to positional bias, we hypothesized the presence of structural heuristics in the decision-making process of the LLMs, strengthened by including significant examples in few-shot scenarios. Finally, by using the Chain-of-Thought (CoT) technique, we elicit the model to reason and mitigate the bias by obtaining more robust models."
}
Markdown (Informal)
[HANS, are you clever? Clever Hans Effect Analysis of Neural Systems](https://preview.aclanthology.org/add-emnlp-2024-awards/2024.starsem-1.25/) (Ranaldi & Zanzotto, *SEM 2024)
ACL