@inproceedings{ghaffari-krishnaswamy-2024-large,
title = "Large Language Models Are Challenged by Habitat-Centered Reasoning",
author = "Ghaffari, Sadaf and
Krishnaswamy, Nikhil",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2024.findings-emnlp.763/",
doi = "10.18653/v1/2024.findings-emnlp.763",
pages = "13047--13059",
abstract = "In this paper we perform a novel in-depth evaluation of text-only and multimodal LLMs' abilities to reason about object *habitats* or conditions on how objects are situated in their environments that affect the types of behaviors (or *affordances*) that can be enacted upon them. We present a novel curated multimodal dataset of questions about object habitats and affordances, which are formally grounded in the underlying lexical semantics literature, with multiple images from various sources that depict the scenario described in the question. We evaluate 16 text-only and multimodal LLMs on this challenging data. Our findings indicate that while certain LLMs can perform reasonably well on reasoning about affordances, there appears to be a consistent low upper bound on habitat-centered reasoning performance. We discuss how the formal semantics of habitats in fact predicts this behavior and propose this as a challenge to the community."
}
Markdown (Informal)
[Large Language Models Are Challenged by Habitat-Centered Reasoning](https://preview.aclanthology.org/fix-sig-urls/2024.findings-emnlp.763/) (Ghaffari & Krishnaswamy, Findings 2024)
ACL