@inproceedings{manna-etal-2024-riddle,
title = "Riddle Me This: Evaluating Large Language Models in Solving Word-Based Games",
author = "Manna, Raffaele and
di Buono, Maria Pia and
Monti, Johanna",
editor = "Madge, Chris and
Chamberlain, Jon and
Fort, Karen and
Kruschwitz, Udo and
Lukin, Stephanie",
booktitle = "Proceedings of the 10th Workshop on Games and Natural Language Processing @ LREC-COLING 2024",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.games-1.11/",
pages = "97--106",
abstract = "In this contribution, we examine the proficiency of Large Language Models (LLMs) in solving the linguistic game {\textquotedblleft}La Ghigliottina,{\textquotedblright} the final game of the popular Italian TV quiz show {\textquotedblleft}L`Eredit{\`a}{\textquotedblright}. This game is particularly challenging as it requires LLMs to engage in semantic inference reasoning for identifying the solutions of the game. Our experiment draws inspiration from Ghigliottin-AI, a task of EVALITA 2020, an evaluation campaign focusing on Natural Language Processing (NLP) and speech tools designed for the Italian language. To benchmark our experiment, we use the results of the most successful artificial player in this task, namely Il Mago della Ghigliottina. The paper describes the experimental setting and the results which show that LLMs perform poorly."
}
Markdown (Informal)
[Riddle Me This: Evaluating Large Language Models in Solving Word-Based Games](https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.games-1.11/) (Manna et al., games 2024)
ACL