@article{takenaka-yanaka-2026-seeing,
title = "Seeing the Other Side: Diagnostic Tasks for Viewpoint Reasoning in Vision{--}Language Models",
author = "Takenaka, Makoto and
Yanaka, Hitomi",
editor = "Piperidis, Stelios and
Bel, N{\'u}ria and
van den Heuvel, Henk and
Ide, Nancy and
Krek, Simon and
Toral, Antonio",
journal = "International Conference on Language Resources and Evaluation",
volume = "main",
month = may,
year = "2026",
address = "Palma de Mallorca, Spain",
publisher = "ELRA Language Resource Association",
url = "https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.737/",
pages = "9386--9395",
abstract = "Humans can integrate multiple visual perspectives and infer how an object appears from unseen sides. This study investigates whether Large Vision Language Models (LVLMs) exhibit a comparable ability for reference-grounded spatial reasoning. We propose two diagnostic tasks: Opposite-Side Reasoning, which determines whether two images show the same object from opposite viewpoints, and Viewpoint Identification, which predicts the viewpoint of a target image using a reference image and its label. An additional condition, Viewpoint Identification (no-ref), removes reference information to reveal cases solvable without it, distinguishing genuine reasoning from bias-driven shortcuts. Our evaluation shows that both open and proprietary LVLMs fall far short of human performance. Even state-of-the-art proprietary LVLMs with relatively high accuracy retain many correct answers when reference information is removed, suggesting that their success often relies on linguistic or dataset-driven priors rather than genuine reference-based reasoning. These findings indicate that current LVLMs have not yet achieved consistent, reference-grounded spatial reasoning. Our datasets in this work will be released on the Hugging Face Hub to support future research on multimodal viewpoint reasoning and spatial understanding."
}Markdown (Informal)
[Seeing the Other Side: Diagnostic Tasks for Viewpoint Reasoning in Vision–Language Models](https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.737/) (Takenaka & Yanaka, LREC 2026)
ACL