@inproceedings{yang-etal-2025-audio,
title = "Audio Is the Achilles' Heel: Red Teaming Audio Large Multimodal Models",
author = "Yang, Hao and
Qu, Lizhen and
Shareghi, Ehsan and
Haffari, Gholamreza",
editor = "Chiruzzo, Luis and
Ritter, Alan and
Wang, Lu",
booktitle = "Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)",
month = apr,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2025.naacl-long.470/",
pages = "9292--9306",
ISBN = "979-8-89176-189-6",
abstract = "Large Multimodal Models (LMMs) have demonstrated the ability to interact with humans under real-world conditions by combining Large Language Models (LLMs) and modality encoders to align multimodal information (visual and auditory) with text. However, such models raise new safety challenges of whether models that are safety-aligned on text also exhibit consistent safeguards for multimodal inputs. Despite recent safety-alignment research on vision LMMs, the safety of audio LMMs remains under-explored. In this work, we comprehensively red team the safety of five advanced audio LMMs under three settings: (i) harmful questions in both audio and text formats, (ii) harmful questions in text format accompanied by distracting non-speech audio, and (iii) speech-specific jailbreaks. Our results under these settings demonstrate that open-source audio LMMs suffer an average attack success rate of 69.14{\%} on harmful audio questions, and exhibit safety vulnerabilities when distracted with non-speech audio noise. Our speech-specific jailbreaks on Gemini-1.5-Pro achieve an attack success rate of 70.67{\%} on the harmful query benchmark. We provide insights on what could cause these reported safety-misalignments. Warning: this paper contains offensive examples."
}
Markdown (Informal)
[Audio Is the Achilles’ Heel: Red Teaming Audio Large Multimodal Models](https://preview.aclanthology.org/fix-sig-urls/2025.naacl-long.470/) (Yang et al., NAACL 2025)
ACL
- Hao Yang, Lizhen Qu, Ehsan Shareghi, and Gholamreza Haffari. 2025. Audio Is the Achilles’ Heel: Red Teaming Audio Large Multimodal Models. In Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers), pages 9292–9306, Albuquerque, New Mexico. Association for Computational Linguistics.