@inproceedings{zhou-etal-2025-focus,
title = "{FOCUS}: Evaluating Pre-trained Vision-Language Models on Underspecification Reasoning",
author = "Zhou, Kankan and
Lai, Eason and
Mouratidis, Kyriakos and
Jiang, Jing",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingestion-acl-25/2025.acl-long.1337/",
pages = "27565--27584",
ISBN = "979-8-89176-251-0",
abstract = "Humans possess a remarkable ability to interpret underspecified ambiguous statements by inferring their meanings from contexts such as visual inputs. This ability, however, may not be as developed in recent pre-trained vision-language models (VLMs). In this paper, we introduce a novel probing dataset called FOCUS to evaluate whether state-of-the-art VLMs have this ability. FOCUS consists of underspecified sentences paired with image contexts and carefully designed probing questions. Our experiments reveal that VLMs still fall short in handling underspecification even when visual inputs that can help resolve the ambiguities are available. To further support research in underspecification, FOCUS will be released for public use. We hope this dataset will inspire further research on the reasoning and contextual understanding capabilities of VLMs."
}
Markdown (Informal)
[FOCUS: Evaluating Pre-trained Vision-Language Models on Underspecification Reasoning](https://preview.aclanthology.org/ingestion-acl-25/2025.acl-long.1337/) (Zhou et al., ACL 2025)
ACL