@inproceedings{na-etal-2025-q,
title = "{Q}-{FAKER}: Query-free Hard Black-box Attack via Controlled Generation",
author = "Na, CheolWon and
Choi, YunSeok and
Lee, Jee-Hyong",
editor = "Chiruzzo, Luis and
Ritter, Alan and
Wang, Lu",
booktitle = "Findings of the Association for Computational Linguistics: NAACL 2025",
month = apr,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2025.findings-naacl.463/",
pages = "8274--8289",
ISBN = "979-8-89176-195-7",
abstract = "Many adversarial attack approaches are proposed to verify the vulnerability of language models. However, they require numerous queries and the information on the target model. Even black-box attack methods also require the target model{'}s output information. They are not applicable in real-world scenarios, as in hard black-box settings where the target model is closed and inaccessible. Even the recently proposed hard black-box attacks still require many queries and demand extremely high costs for training adversarial generators. To address these challenges, we propose Q-faker (Query-free Hard Black-box Attacker), a novel and efficient method that generates adversarial examples without accessing the target model. To avoid accessing the target model, we use a surrogate model instead. The surrogate model generates adversarial sentences for a target-agnostic attack. During this process, we leverage controlled generation techniques. We evaluate our proposed method on eight datasets. Experimental results demonstrate our method{'}s effectiveness including high transferability and the high quality of the generated adversarial examples, and prove its practical in hard black-box settings."
}
Markdown (Informal)
[Q-FAKER: Query-free Hard Black-box Attack via Controlled Generation](https://preview.aclanthology.org/fix-sig-urls/2025.findings-naacl.463/) (Na et al., Findings 2025)
ACL