@inproceedings{yang-li-2024-best,
title = "The Best Defense is Attack: Repairing Semantics in Textual Adversarial Examples",
author = "Yang, Heng and
Li, Ke",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2024.emnlp-main.481/",
doi = "10.18653/v1/2024.emnlp-main.481",
pages = "8439--8457",
abstract = "Recent studies have revealed the vulnerability of pre-trained language models to adversarial attacks. Adversarial defense techniques have been proposed to reconstruct adversarial examples within feature or text spaces. However, these methods struggle to effectively repair the semantics in adversarial examples, resulting in unsatisfactory defense performance. To repair the semantics in adversarial examples, we introduce a novel approach named Reactive Perturbation Defocusing (Rapid), which employs an adversarial detector to identify the fake labels of adversarial examples and leverages adversarial attackers to repair the semantics in adversarial examples. Our extensive experimental results, conducted on four public datasets, demonstrate the consistent effectiveness of Rapid in various adversarial attack scenarios. For easy evaluation, we provide a click-to-run demo of Rapid at https://tinyurl.com/22ercuf8."
}
Markdown (Informal)
[The Best Defense is Attack: Repairing Semantics in Textual Adversarial Examples](https://preview.aclanthology.org/add-emnlp-2024-awards/2024.emnlp-main.481/) (Yang & Li, EMNLP 2024)
ACL