@inproceedings{wong-etal-2025-vaquum,
title = "{VAQUUM}: Are Vague Quantifiers Grounded in Visual Data?",
author = "Wong, Hugh Mee and
Nouwen, Rick and
Gatt, Albert",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/transition-to-people-yaml/2025.findings-acl.619/",
doi = "10.18653/v1/2025.findings-acl.619",
pages = "11966--11982",
ISBN = "979-8-89176-256-5",
abstract = "Vague quantifiers such as ``a few'' and ``many'' are influenced by various contextual factors, including the number of objects present in a given context. In this work, we evaluate the extent to which vision-and-language models (VLMs) are compatible with humans when producing or judging the appropriateness of vague quantifiers in visual contexts. We release a novel dataset, VAQUUM, containing 20,300 human ratings on quantified statements across a total of 1089 images. Using this dataset, we compare human judgments and VLM predictions using three different evaluation methods. Our findings show that VLMs, like humans, are influenced by object counts in vague quantifier use. However, we find significant inconsistencies across models in different evaluation settings, suggesting that judging and producing vague quantifiers rely on two different processes. We release our dataset and code at https://github.com/hughmee/vaquum."
}
Markdown (Informal)
[VAQUUM: Are Vague Quantifiers Grounded in Visual Data?](https://preview.aclanthology.org/transition-to-people-yaml/2025.findings-acl.619/) (Wong et al., Findings 2025)
ACL