@inproceedings{zhao-etal-2026-beyond-retrieval,
title = "Beyond Retrieval: Bi-Temporal State Arbitration for Longitudinal Healthcare Agents",
author = "Zhao, Jianing and
Zhi, Xiaoquan and
Yu, Xinqiang",
editor = "Chen, Canyu and
Zhang, Yuji and
Li, Zoey Sha and
Wang, Zihan and
Wang, Qineng and
Su, Jinyan and
Kargupta, Priyanka and
Marjanovi{\'c}, Sara Vera and
Pan, Jeff Z. and
Bansal, Mohit and
Augenstein, Isabelle and
Han, Jiawei and
Ji, Heng and
Li, Manling",
booktitle = "Proceedings of the 4th Workshop on Towards Knowledgeable Foundation Models ({K}now{FM} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.knowfm-1.10/",
pages = "129--137",
ISBN = "979-8-89176-403-3",
abstract = "Longitudinal healthcare agents require persistent state tracking under temporal uncertainty. In domains like chronic disease management, patient states{---}medications, symptoms, and vital signs{---}evolve continuously over months. Existing memory architectures for Large Language Models (LLMs) are inherently retrieval-centric: they treat memory as a static repository of past interactions, failing to resolve conflicting or superseded information when queried for the current patient state. We propose a shift to state-centric memory. Our framework introduces (1) a bi-temporal state representation that decouples event time from ingestion time and tracks temporal validity windows, (2) an incremental state arbitration mechanism using four operators{---}SUPPORT, REFINE, SUPERSEDE, and BRANCH-CONFLICT{---}to handle evolving medical facts without destructive overwriting, and (3) a confidence-thresholded evidence escalation layer for robust, efficient memory access. Evaluated on a longitudinal diabetes management suite as a representative biomedical state tracking task, our method achieves a Unique-F1 of 0.85 and Conflict-F1 of 0.98, substantially improves upon long-context LLMs (0.38 / 0.89) and standard vector memory (0.30 / 0.60), demonstrating that agentic AI in longitudinal biomedical settings requires continuous, evidence-grounded arbitration rather than simple retrieval."
}