@inproceedings{yin-etal-2025-kia,
title = "{KIA}: Knowledge-Guided Implicit Vision-Language Alignment for Chest {X}-Ray Report Generation",
author = "Yin, Heng and
Zhou, Shanlin and
Wang, Pandong and
Wu, Zirui and
Hao, Yongtao",
editor = "Rambow, Owen and
Wanner, Leo and
Apidianaki, Marianna and
Al-Khalifa, Hend and
Eugenio, Barbara Di and
Schockaert, Steven",
booktitle = "Proceedings of the 31st International Conference on Computational Linguistics",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2025.coling-main.276/",
pages = "4096--4108",
abstract = "Report generation (RG) faces challenges in understanding complex medical images and establishing cross-modal semantic alignment in radiology image-report pairs. Previous methods often overlook fine-grained cross-modal interaction, leading to insufficient understanding of detailed information. Recently, various large multimodal models have been proposed for image-text tasks. However, such models still underperform on rare domain tasks like understanding complex medical images. To address these limitations, we develop a new framework of Knowledge-guided Implicit vision-language Alignment for radiology report generation, named KIA. To better understand medical reports and images and build alignment between them, multi-task implicit alignment is creatively introduced, forming comprehensive understanding of medical images and reports. Additionally, to further meet medical refinement requirements, we design novel masking strategies guided by medical knowledge to enhance pathological observation and anatomical landm"
}
Markdown (Informal)
[KIA: Knowledge-Guided Implicit Vision-Language Alignment for Chest X-Ray Report Generation](https://preview.aclanthology.org/jlcl-multiple-ingestion/2025.coling-main.276/) (Yin et al., COLING 2025)
ACL