@inproceedings{le-etal-2022-perturbations,
title = "Perturbations in the Wild: Leveraging Human-Written Text Perturbations for Realistic Adversarial Attack and Defense",
author = "Le, Thai and
Lee, Jooyoung and
Yen, Kevin and
Hu, Yifan and
Lee, Dongwon",
editor = "Muresan, Smaranda and
Nakov, Preslav and
Villavicencio, Aline",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2022",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2022.findings-acl.232/",
doi = "10.18653/v1/2022.findings-acl.232",
pages = "2953--2965",
abstract = "We proposes a novel algorithm, ANTHRO, that inductively extracts over 600K human-written text perturbations in the wild and leverages them for realistic adversarial attack. Unlike existing character-based attacks which often deductively hypothesize a set of manipulation strategies, our work is grounded on actual observations from real-world texts. We find that adversarial texts generated by ANTHRO achieve the best trade-off between (1) attack success rate, (2) semantic preservation of the original text, and (3) stealthiness{--}i.e. indistinguishable from human writings hence harder to be flagged as suspicious. Specifically, our attacks accomplished around 83{\%} and 91{\%} attack success rates on BERT and RoBERTa, respectively. Moreover, it outperformed the TextBugger baseline with an increase of 50{\%} and 40{\%} in terms of semantic preservation and stealthiness when evaluated by both layperson and professional human workers. ANTHRO can further enhance a BERT classifier`s performance in understanding different variations of human-written toxic texts via adversarial training when compared to the Perspective API."
}
Markdown (Informal)
[Perturbations in the Wild: Leveraging Human-Written Text Perturbations for Realistic Adversarial Attack and Defense](https://preview.aclanthology.org/jlcl-multiple-ingestion/2022.findings-acl.232/) (Le et al., Findings 2022)
ACL