@inproceedings{liao-etal-2025-reflectool,
title = "{R}eflec{T}ool: Towards Reflection-Aware Tool-Augmented Clinical Agents",
author = "Liao, Yusheng and
Jiang, Shuyang and
Wang, Yanfeng and
Wang, Yu",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingestion-acl-25/2025.acl-long.663/",
pages = "13507--13531",
ISBN = "979-8-89176-251-0",
abstract = "Large Language Models (LLMs) have shown promising potential in the medical domain, assisting with tasks like clinical note generation and patient communication. However, current LLMs are limited to text-based communication, hindering their ability to interact with diverse forms of information in clinical environments. Despite clinical agents succeeding in diverse signal interaction, they are oriented to a single clinical scenario and hence fail for broader applications. To evaluate clinical agents holistically, we propose ClinicalAgent Bench (CAB), a comprehensive medical agent benchmark consisting of 18 tasks across five key realistic clinical dimensions. Building on this, we introduce ReflectTool, a novel framework that excels at utilizing domain-specific tools within two stages. The first optimization stage progressively enlarges a long-term memory by saving successful solving processes and tool-wise experience of agents in a tiny pre-defined training set. In the following inference stage, ReflectTool can search for supportive successful demonstrations from already built long-term memory to guide the tool selection strategy, and a verifier improves the tool usage according to the tool-wise experience with two verification methods{--}iterative refinement and candidate selection. Extensive experiments on CAB demonstrate that ReflectTool surpasses the pure LLMs with more than 10 points and the well-established agent-based methods with 3 points, highlighting its adaptability and effectiveness in solving complex clinical tasks. Our code and datasets are available at https://github.com/BlueZeros/ReflecTool."
}
Markdown (Informal)
[ReflecTool: Towards Reflection-Aware Tool-Augmented Clinical Agents](https://preview.aclanthology.org/ingestion-acl-25/2025.acl-long.663/) (Liao et al., ACL 2025)
ACL