@inproceedings{li-etal-2026-privacy,
title = "Where Privacy Risk Lives in {E}nglish-Source Multilingual {RAG}: A Stage-Decomposed Audit Across Five Query Languages",
author = "Li, Yanhang and
Fan, Zhichao and
Zhuang, Zexin",
editor = "Huang, Kaiyu and
Mo, Fengran and
Chen, Pinzhen and
Jiang, Meng",
booktitle = "Proceedings of the 1st Workshop on Multilinguality in the Era of Large Language Models ({M}e{LLM} 2026)",
month = jul,
year = "2026",
address = "San Diego, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.mellm-1.28/",
pages = "284--293",
ISBN = "979-8-89176-430-9",
abstract = "A common assumption holds that switching to a non-English language makes a multilingual RAG system easier to attack for personal information. On an English-source synthetic-PII corpus with five query languages and a two-stage defence (LLM input judge + regex output filter), the output-stage point estimates do not support that assumption: English has the highest observed unstructured-PII leak rate, and only English-vs-Swahili separates cleanly under our document-level bootstrap intervals. Once the input judge is added, residual leaks remain on Arabic and Swahili in this Qwen-mediated pipeline, and back-translating the query does not close the gap. Translator, judge, and generator share one model family, so we treat this as pipeline-conditional rather than a causal language ranking. As an oracle diagnostic on a separate n=17 multilingual-prompted-judge residual corner, attaching the gold corpus document to the input judge blocks 15/17 residual cells {---} a follow-up direction, not a deployed mitigation, since all BLOCK/ALLOW rates are on adversarial queries only and we measure no benign-query FPR or utility. The anonymous supplement contains code, corpora, queries, and per-trial JSONLs."
}Markdown (Informal)
[Where Privacy Risk Lives in English-Source Multilingual RAG: A Stage-Decomposed Audit Across Five Query Languages](https://preview.aclanthology.org/ingest-acl-workshops/2026.mellm-1.28/) (Li et al., MeLLM 2026)
ACL