@inproceedings{yanaka-etal-2023-medical,
title = "Medical Visual Textual Entailment for Numerical Understanding of Vision-and-Language Models",
author = "Yanaka, Hitomi and
Nakamura, Yuta and
Chida, Yuki and
Kurosawa, Tomoya",
editor = "Naumann, Tristan and
Ben Abacha, Asma and
Bethard, Steven and
Roberts, Kirk and
Rumshisky, Anna",
booktitle = "Proceedings of the 5th Clinical Natural Language Processing Workshop",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.clinicalnlp-1.2",
doi = "10.18653/v1/2023.clinicalnlp-1.2",
pages = "8--18",
abstract = "Assessing the capacity of numerical understanding of vision-and-language models over images and texts is crucial for real vision-and-language applications, such as systems for automated medical image analysis. We provide a visual reasoning dataset focusing on numerical understanding in the medical domain. The experiments using our dataset show that current vision-and-language models fail to perform numerical inference in the medical domain. However, the data augmentation with only a small amount of our dataset improves the model performance, while maintaining the performance in the general domain.",
}
Markdown (Informal)
[Medical Visual Textual Entailment for Numerical Understanding of Vision-and-Language Models](https://aclanthology.org/2023.clinicalnlp-1.2) (Yanaka et al., ClinicalNLP 2023)
ACL