@inproceedings{wang-kovashka-2025-probing,
title = "Probing Logical Reasoning of {MLLM}s in Scientific Diagrams",
author = "Wang, Yufei and
Kovashka, Adriana",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-main.542/",
pages = "10717--10729",
ISBN = "979-8-89176-332-6",
abstract = "We examine how multimodal large language models (MLLMs) perform logical inference grounded in visual information. We first construct a dataset of food web/chain images, along with questions that follow seven structured templates with progressively more complex reasoning involved. We show that complex reasoning about entities in the images remains challenging (even with elaborate prompts) and that visual information is underutilized."
}Markdown (Informal)
[Probing Logical Reasoning of MLLMs in Scientific Diagrams](https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-main.542/) (Wang & Kovashka, EMNLP 2025)
ACL