@inproceedings{zhang-etal-2025-libra,
title = "Libra: Leveraging Temporal Images for Biomedical Radiology Analysis",
author = "Zhang, Xi and
Meng, Zaiqiao and
Lever, Jake and
Ho, Edmond S. L.",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/acl25-workshop-ingestion/2025.findings-acl.888/",
pages = "17275--17303",
ISBN = "979-8-89176-256-5",
abstract = "Radiology report generation (RRG) requires advanced medical image analysis, effective temporal reasoning, and accurate text generation. While multimodal large language models (MLLMs) align with pre-trained vision encoders to enhance visual-language understanding, most existing methods rely on single-image analysis or rule-based heuristics to process multiple images, failing to fully leverage temporal information in multi-modal medical datasets. In this paper, we introduce **Libra**, a temporal-aware MLLM tailored for chest X-ray report generation. Libra combines a radiology-specific image encoder with a novel Temporal Alignment Connector (**TAC**), designed to accurately capture and integrate temporal differences between paired current and prior images. Extensive experiments on the MIMIC-CXR dataset demonstrate that Libra establishes a new state-of-the-art benchmark among similarly scaled MLLMs, setting new standards in both clinical relevance and lexical accuracy. All source code and data are publicly available at: https://github.com/X-iZhang/Libra."
}
Markdown (Informal)
[Libra: Leveraging Temporal Images for Biomedical Radiology Analysis](https://preview.aclanthology.org/acl25-workshop-ingestion/2025.findings-acl.888/) (Zhang et al., Findings 2025)
ACL