@inproceedings{al-lawati-wang-2026-multimodal,
title = "Do Multimodal {RAG} Systems Leak Data? A Comprehensive Evaluation of Membership Inference and Image Caption Retrieval Attacks",
author = "Al-Lawati, Ali and
Wang, Suhang",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.findings-acl.444/",
pages = "9139--9154",
ISBN = "979-8-89176-395-1",
abstract = "The growing adoption of multimodal Retrieval-Augmented Generation (mRAG) pipelines for vision-centric tasks (e.g., visual QA) introduces important privacy challenges. In particular, while mRAG provides a practical capability to connect private datasets and improve model performance, it risks the leakage of private information from these datasets. In this paper, we perform an empirical study to analyze the privacy risks inherent in the mRAG pipeline observed through standard model prompting. Specifically, we implement a case study that attempts to determine whether a visual asset (e.g., image) is included in the mRAG, and, if present, to leak the metadata (e.g., caption) related to it.Our findings highlight the need for privacy-preserving mechanisms and motivate future research on mRAG privacy. Our code is published online: \url{https://github.com/aliwister/mrag-attack-eval}."
}Markdown (Informal)
[Do Multimodal RAG Systems Leak Data? A Comprehensive Evaluation of Membership Inference and Image Caption Retrieval Attacks](https://preview.aclanthology.org/ingest-acl/2026.findings-acl.444/) (Al-Lawati & Wang, Findings 2026)
ACL