@inproceedings{wang-etal-2025-typographic,
title = "Typographic Attacks in a Multi-Image Setting",
author = "Wang, Xiaomeng and
Zhao, Zhengyu and
Larson, Martha",
editor = "Chiruzzo, Luis and
Ritter, Alan and
Wang, Lu",
booktitle = "Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)",
month = apr,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2025.naacl-long.626/",
pages = "12594--12604",
ISBN = "979-8-89176-189-6",
abstract = "Large Vision-Language Models (LVLMs) are susceptible to typographic attacks, which are misclassifications caused by an attack text that is added to an image. In this paper, we introduce a multi-image setting for studying typographic attacks, broadening the current emphasis of the literature on attacking individual images. Specifically, our focus is on attacking image sets without repeating the attack query. Such non-repeating attacks are stealthier, as they are more likely to evade a gatekeeper than attacks that repeat the same attack text. We introduce two attack strategies for the multi-image setting, leveraging the difficulty of the target image, the strength of the attack text, and text-image similarity. Our text-image similarity approach improves attack success rates by 21{\%} over random, non-specific methods on the CLIP model using ImageNet while maintaining stealth in a multi-image scenario. An additional experiment demonstrates transferability, i.e., text-image similarity calculated using CLIP transfers when attacking InstructBLIP."
}
Markdown (Informal)
[Typographic Attacks in a Multi-Image Setting](https://preview.aclanthology.org/fix-sig-urls/2025.naacl-long.626/) (Wang et al., NAACL 2025)
ACL
- Xiaomeng Wang, Zhengyu Zhao, and Martha Larson. 2025. Typographic Attacks in a Multi-Image Setting. In Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers), pages 12594–12604, Albuquerque, New Mexico. Association for Computational Linguistics.