@inproceedings{zhang-etal-2024-gla,
title = "Gla-{AI}4{B}io{M}ed at {RRG}24: Visual Instruction-tuned Adaptation for Radiology Report Generation",
author = "Zhang, Xi and
Meng, Zaiqiao and
Lever, Jake and
Ho, Edmond S.L.",
editor = "Demner-Fushman, Dina and
Ananiadou, Sophia and
Miwa, Makoto and
Roberts, Kirk and
Tsujii, Junichi",
booktitle = "Proceedings of the 23rd Workshop on Biomedical Natural Language Processing",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/moar-dois/2024.bionlp-1.54/",
doi = "10.18653/v1/2024.bionlp-1.54",
pages = "624--634",
abstract = "This paper introduces a radiology-focused visual language model designed to generate radiology reports from chest X-rays. Building on previous findings that large language models can acquire multimodal capabilities when aligned with pretrained vision encoders, we demonstrate similar potential with chest X-ray images. The model combines an image encoder (CLIP) with a fine-tuned large language model (LLM) based on the Vicuna-7B architecture. The training process involves a two-stage approach: initial alignment of chest X-ray features with the LLM, followed by fine-tuning for radiology report generation. The study highlights the importance of generating both FINDINGS and IMPRESSIONS sections in radiology reports and evaluates the model{'}s performance using various metrics, achieving notable accuracy in generating high-quality medical reports. The research also addresses the need for domain-specific fine-tuning to capture the intricate details necessary for accurate medical interpretations and reports."
}
Markdown (Informal)
[Gla-AI4BioMed at RRG24: Visual Instruction-tuned Adaptation for Radiology Report Generation](https://preview.aclanthology.org/moar-dois/2024.bionlp-1.54/) (Zhang et al., BioNLP 2024)
ACL