@inproceedings{garg-ramakrishnan-2020-bae,
title = "{BAE}: {BERT}-based Adversarial Examples for Text Classification",
author = "Garg, Siddhant and
Ramakrishnan, Goutham",
editor = "Webber, Bonnie and
Cohn, Trevor and
He, Yulan and
Liu, Yang",
booktitle = "Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2020.emnlp-main.498/",
doi = "10.18653/v1/2020.emnlp-main.498",
pages = "6174--6181",
abstract = "Modern text classification models are susceptible to adversarial examples, perturbed versions of the original text indiscernible by humans which get misclassified by the model. Recent works in NLP use rule-based synonym replacement strategies to generate adversarial examples. These strategies can lead to out-of-context and unnaturally complex token replacements, which are easily identifiable by humans. We present BAE, a black box attack for generating adversarial examples using contextual perturbations from a BERT masked language model. BAE replaces and inserts tokens in the original text by masking a portion of the text and leveraging the BERT-MLM to generate alternatives for the masked tokens. Through automatic and human evaluations, we show that BAE performs a stronger attack, in addition to generating adversarial examples with improved grammaticality and semantic coherence as compared to prior work."
}
Markdown (Informal)
[BAE: BERT-based Adversarial Examples for Text Classification](https://preview.aclanthology.org/add-emnlp-2024-awards/2020.emnlp-main.498/) (Garg & Ramakrishnan, EMNLP 2020)
ACL