@inproceedings{hu-etal-2025-vlsbench,
title = "{VLSB}ench: Unveiling Visual Leakage in Multimodal Safety",
author = "Hu, Xuhao and
Liu, Dongrui and
Li, Hao and
Huang, Xuanjing and
Shao, Jing",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingestion-acl-25/2025.acl-long.405/",
pages = "8285--8316",
ISBN = "979-8-89176-251-0",
abstract = "Safety concerns of Multimodal large language models (MLLMs) have gradually become an important problem in various applications. Surprisingly, previous works indicate a counterintuitive phenomenon that using textual unlearning to align MLLMs achieves comparable safety performances with MLLMs aligned with image-text pairs. To explain such a phenomenon, we discover a $\textit{\textbf{V}isual \textbf{S}afety \textbf{I}nformation \textbf{L}eakage} (\textbf{VSIL})$ problem in existing multimodal safety benchmarks, $\textit{i.e.}$, the potentially risky content in the image has been revealed in the textual query. Thus, MLLMs can easily refuse these sensitive image-text pairs according to textual queries only, leading to \textbf{unreliable cross-modality safety evaluation of MLLMs}. We also conduct a further comparison experiment between textual alignment and multimodal alignment to highlight this drawback. To this end, we construct $\textit{\textbf{V}isual \textbf{L}eakless \textbf{S}afety \textbf{B}ench} (\textbf{VLSBench})$ with 2.2k image-text pairs through an automated data pipeline. Experimental results indicate that VLSBench poses a significant challenge to both open-source and close-source MLLMs, $\textit{i.e.}$, LLaVA, Qwen2-VL and GPT-4o. Besides, we empirically compare textual and multimodal alignment methods on VLSBench and find that textual alignment is effective enough for multimodal safety scenarios with VSIL, while multimodal alignment is preferable for safety scenarios without VSIL."
}
Markdown (Informal)
[VLSBench: Unveiling Visual Leakage in Multimodal Safety](https://preview.aclanthology.org/ingestion-acl-25/2025.acl-long.405/) (Hu et al., ACL 2025)
ACL
- Xuhao Hu, Dongrui Liu, Hao Li, Xuanjing Huang, and Jing Shao. 2025. VLSBench: Unveiling Visual Leakage in Multimodal Safety. In Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 8285–8316, Vienna, Austria. Association for Computational Linguistics.