@article{wang-etal-2026-one,
title = "This One or That One? A Study on Accessibility via Demonstratives with Multimodal Large Language Models",
author = "Wang, Yu and
Chersoni, Emmanuele and
Huang, Chu-Ren",
editor = "Piperidis, Stelios and
Bel, N{\'u}ria and
van den Heuvel, Henk and
Ide, Nancy and
Krek, Simon and
Toral, Antonio",
journal = "International Conference on Language Resources and Evaluation",
volume = "main",
month = may,
year = "2026",
address = "Palma de Mallorca, Spain",
publisher = "ELRA Language Resource Association",
url = "https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.763/",
pages = "9722--9732",
abstract = "Accessibility refers to the ease with which a speaker can acquire an object, and it is often conveyed through demonstrative pronouns like ``this'' and ``that'', indicating proximal or distal objects. Most importantly, accessibility also involves perspective shifts, which are essential for understanding differing viewpoints. In this case study, we adopt an evaluation dataset with a pair-to-pair question structure for referent identification based on demonstratives. Our experiments show that current Multimodal Large Language Models (MLLMs) exhibit markedly low performance in accessibility tasks requiring perspective shifts, with accuracies around 2.33{\%} (Chinese) and 1.83{\%} (English). Moreover, models struggle with qualitative characteristics and frame-based reasoning, often failing to apply implicit contextual rules unless explicitly encoded in training data. These limitations suggest that MLLMs rely heavily on surface co-occurrence instead of truly grounded, embodied experience. Our evaluation framework provides a robust lens revealing that MLLMs lack both self-other distinction{---}an essential aspect of self-awareness{---}and the embodied cognition necessary for reliable performance in practical embodied AI applications."
}Markdown (Informal)
[This One or That One? A Study on Accessibility via Demonstratives with Multimodal Large Language Models](https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.763/) (Wang et al., LREC 2026)
ACL