@inproceedings{roy-etal-2026-grounded,
title = "Grounded Concreteness: Human-Like Concreteness Sensitivity in Vision{--}Language Models",
author = "Roy, Aryan and
Wang, Zekun and
MacLellan, Christopher J.",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.findings-acl.2081/",
pages = "41934--41950",
ISBN = "979-8-89176-395-1",
abstract = "Do vision-language models (VLMs) develop more human-like sensitivity to linguistic concreteness than text-only large language models (LLMs) when both are evaluated with text-only prompts? We study this question with a controlled comparison between matched Llama text backbones and their Llama Vision counterparts across multiple model scales, treating multimodal pretraining as an ablation on perceptual grounding rather than access to images at inference. We measure concreteness effects at three complementary levels: (i) output behavior, by relating question-level concreteness to QA accuracy; (ii) embedding geometry, by testing whether representations organize along a concreteness axis; and (iii) attention dynamics, by quantifying context reliance via attention-entropy measures. In addition, we elicit token-level concreteness ratings from models and evaluate alignment to human norm distributions, testing whether multimodal training yields more human-consistent judgments. Across benchmarks and scales, VLMs show larger gains on more concrete inputs, exhibit clearer concreteness-structured representations, produce ratings that better match human norms, and display systematically different attention patterns consistent with increased grounding."
}Markdown (Informal)
[Grounded Concreteness: Human-Like Concreteness Sensitivity in Vision–Language Models](https://preview.aclanthology.org/ingest-acl/2026.findings-acl.2081/) (Roy et al., Findings 2026)
ACL