@inproceedings{hu-etal-2025-viva,
title = "{VIVA}+: Human-Centered Situational Decision-Making",
author = "Hu, Zhe and
Ren, Yixiao and
Liu, Guanzhong and
Li, Jing and
Yin, Yu",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.findings-emnlp.944/",
doi = "10.18653/v1/2025.findings-emnlp.944",
pages = "17420--17437",
ISBN = "979-8-89176-335-7",
abstract = "Multimodal Large Language Models (MLLMs) show promising results for embodied agents in operating meaningfully in complex, human-centered environments. Yet, evaluating their capacity for nuanced, human-like reasoning and decision-making remains challenging. In this work, we introduce VIVA+, a cognitively grounded benchmark for evaluating the reasoning and decision-making of MLLMs in human-centered situations. VIVA+ consists of 1,317 real-world situations paired with 6,373 multiple-choice questions, targeting three core abilities for decision-making: (1) Foundational Situation Comprehension, (2) Context-Driven Action Justification, and (3) Reflective Reasoning. Together, these dimensions provide a systematic framework for assessing a model{'}s ability to perceive, reason, and act in socially meaningful ways. We evaluate the latest commercial and open-source models on VIVA+, where we reveal distinct performance patterns and highlight significant challenges. We further explore targeted training and multi-step reasoning strategies, which yield consistent performance improvements. Finally, our in-depth analysis highlights current model limitations and provides actionable insights for advancing MLLMs toward more robust, context-aware, and socially adept decision-making in real-world settings."
}Markdown (Informal)
[VIVA+: Human-Centered Situational Decision-Making](https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.findings-emnlp.944/) (Hu et al., Findings 2025)
ACL
- Zhe Hu, Yixiao Ren, Guanzhong Liu, Jing Li, and Yu Yin. 2025. VIVA+: Human-Centered Situational Decision-Making. In Findings of the Association for Computational Linguistics: EMNLP 2025, pages 17420–17437, Suzhou, China. Association for Computational Linguistics.