@inproceedings{pensa-etal-2024-multi,
title = "A Multi-layered Approach to Physical Commonsense Understanding: Creation and Evaluation of an {I}talian Dataset",
author = "Pensa, Giulia and
Altuna, Bego{\~n}a and
Gonzalez-Dios, Itziar",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://preview.aclanthology.org/fix-sig-urls/2024.lrec-main.74/",
pages = "819--831",
abstract = "In this paper, we explore physical commonsense reasoning of large language models (LLMs) and propose a specific methodology to evaluate low-level understanding of the physical world. Specifically, the goal is to create a test set to analyze physical commonsense reasoning in large language models for Italian and focus on a trustworthy analysis of the results. To that end, we present a tiered Italian dataset, called Graded Italian Annotated dataset (GITA), written and thoroughly annotated by a professional linguist, which allows us to concentrate on three different levels of commonsense understanding. Moreover, we create a semi-automated system to complete the accurate annotation of the dataset. We also validate our dataset by carrying out three tasks with a multilingual model (XLM-RoBERTa) and propose a qualitative analysis of the results. We found out that, although the model may perform at high-level classification tasks, its easoning is inconsistent and unverifiable, since it does not capture intermediate evidence."
}
Markdown (Informal)
[A Multi-layered Approach to Physical Commonsense Understanding: Creation and Evaluation of an Italian Dataset](https://preview.aclanthology.org/fix-sig-urls/2024.lrec-main.74/) (Pensa et al., LREC-COLING 2024)
ACL