@inproceedings{cho-etal-2026-mi,
title = "{MI}-{CXR}: A Benchmark for Longitudinal Reasoning over Multi-Interval Chest {X}-rays",
author = "Cho, Sunghwan Steve and
Han, Yunseok and
Do, Jaeyoung",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.findings-acl.1512/",
pages = "30241--30273",
ISBN = "979-8-89176-395-1",
abstract = "Longitudinal chest X-ray (CXR) interpretation requires reasoning over disease evolution across multiple patient visits, yet most existing medical VQA benchmarks focus on single images or short-horizon image pairs. We introduce **MI-CXR**, a benchmark for standardized evaluation of **M**ulti-**I**nterval longitudinal reasoning over multi-visit **CXR** sequences, without requiring free-form report generation or additional clinical context. MI-CXR comprises five-way multiple-choice questions over five-visit patient timelines and instantiates three complementary task families: Temporal Event Localization, Interval-wise Change Reasoning, and Global Trajectory Summarization, which assess clinically grounded visual reasoning over time. Evaluating 14 state-of-the-art vision{--}language models (VLMs) shows low overall performance (29.3{\%} accuracy), only modestly above random guessing. Using stage-wise diagnostic probing, we find that models often produce locally plausible interval descriptions but fail to enforce temporal constraints or compose evidence into globally consistent decisions over the full timeline. These findings reveal key limitations of current VLMs and establish MI-CXR as a principled benchmark for longitudinal medical reasoning. The benchmark is available at: https://github.com/AIDASLab/MI-CXR"
}Markdown (Informal)
[MI-CXR: A Benchmark for Longitudinal Reasoning over Multi-Interval Chest X-rays](https://preview.aclanthology.org/ingest-acl/2026.findings-acl.1512/) (Cho et al., Findings 2026)
ACL