@inproceedings{derner-batistic-2025-beyond,
title = "Beyond Words: Multilingual and Multimodal Red Teaming of {MLLM}s",
author = "Derner, Erik and
Batisti{\v{c}}, Kristina",
editor = "Novikova, Jekaterina",
booktitle = "Proceedings of the The First Workshop on LLM Security (LLMSEC)",
month = aug,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/transition-to-people-yaml/2025.llmsec-1.15/",
pages = "198--206",
ISBN = "979-8-89176-279-4",
abstract = "Multimodal large language models (MLLMs) are increasingly deployed in real-world applications, yet their safety remains underexplored, particularly in multilingual and visual contexts. In this work, we present a systematic red teaming framework to evaluate MLLM safeguards using adversarial prompts translated into seven languages and delivered via four input modalities: plain text, jailbreak prompt + text, text rendered as an image, and jailbreak prompt + text rendered as an image. We find that rendering prompts as images increases attack success rates and reduces refusal rates, with the effect most pronounced in lower-resource languages such as Slovenian, Czech, and Valencian. Our results suggest that vision-based multilingual attacks expose a persistent gap in current alignment strategies, highlighting the need for robust multilingual and multimodal MLLM safety evaluation and mitigation of these risks. We make our code and data available."
}
Markdown (Informal)
[Beyond Words: Multilingual and Multimodal Red Teaming of MLLMs](https://preview.aclanthology.org/transition-to-people-yaml/2025.llmsec-1.15/) (Derner & Batistič, LLMSEC 2025)
ACL