@inproceedings{aksoy-etal-2024-enhancing,
title = "Enhancing Image-to-Text Generation in Radiology Reports through Cross-modal Multi-Task Learning",
author = "Aksoy, Nurbanu and
Ravikumar, Nishant and
Sharoff, Serge",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://preview.aclanthology.org/fix-sig-urls/2024.lrec-main.529/",
pages = "5977--5985",
abstract = "Image-to-text generation involves automatically generating descriptive text from images and has applications in medical report generation. However, traditional approaches often exhibit a semantic gap between visual and textual information. In this paper, we propose a multi-task learning framework to leverage both visual and non-imaging data for generating radiology reports. Along with chest X-ray images, 10 additional features comprising numeric, binary, categorical, and text data were incorporated to create a unified representation. The model was trained to generate text, predict the degree of patient severity, and identify medical findings. Multi-task learning, especially with text generation prioritisation, improved performance over single-task baselines across language generation metrics. The framework also mitigated overfitting in auxiliary tasks compared to single-task models. Qualitative analysis showed logically coherent narratives and accurate identification of findings, though some repetition and disjointed phrasing remained. This work demonstrates the benefits of multi-modal, multi-task learning for image-to-text generation applications."
}
Markdown (Informal)
[Enhancing Image-to-Text Generation in Radiology Reports through Cross-modal Multi-Task Learning](https://preview.aclanthology.org/fix-sig-urls/2024.lrec-main.529/) (Aksoy et al., LREC-COLING 2024)
ACL