@inproceedings{enhos-etal-2025-minimizing,
title = "Minimizing Queries, Maximizing Impact: Adaptive Score-Based Attack and Defense for Sentiment Analysis",
author = "Enhos, Yigit Efe and
Wein, Shira and
Alfeld, Scott",
editor = "Inui, Kentaro and
Sakti, Sakriani and
Wang, Haofen and
Wong, Derek F. and
Bhattacharyya, Pushpak and
Banerjee, Biplab and
Ekbal, Asif and
Chakraborty, Tanmoy and
Singh, Dhirendra Pratap",
booktitle = "Proceedings of the 14th International Joint Conference on Natural Language Processing and the 4th Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics",
month = dec,
year = "2025",
address = "Mumbai, India",
publisher = "The Asian Federation of Natural Language Processing and The Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-ijcnlp-aacl/2025.ijcnlp-long.15/",
pages = "246--258",
ISBN = "979-8-89176-298-5",
abstract = "While state-of-the-art large language models find high rates of success on text classification tasks such as sentiment analysis, they still exhibit vulnerabilities to adversarial examples: meticulously crafted perturbations of input data that guide models into making false predictions. These adversarial attacks are of particular concern when the systems in question are user-facing. While many attacks are able to reduce the accuracy of Transformer-based classifiers by a substantial margin, they often require a large amount of computational time and a large number of queries made to the attacked model, which makes the attacks susceptible to detection. In this work, we resolve the limitations of high query counts and necessary computational time by proposing a query-efficient word-level attack that is fast during deployment and does not compromise the attack success rate of state-of-the-art methods. Our attack constructs a dictionary of adversarial word substitutions based on prior data and leverages these substitutions to flip the sentiment classification of the text. Our attack method achieves an average of 27.49 queries{---}over 30{\%} fewer than the closest competitor{---}while maintaining a 99.70{\%} attack success rate. We also develop an effective defense strategy inspired by our attack approach."
}Markdown (Informal)
[Minimizing Queries, Maximizing Impact: Adaptive Score-Based Attack and Defense for Sentiment Analysis](https://preview.aclanthology.org/ingest-ijcnlp-aacl/2025.ijcnlp-long.15/) (Enhos et al., IJCNLP-AACL 2025)
ACL