@inproceedings{chan-etal-2020-poison,
title = "Poison Attacks against Text Datasets with Conditional Adversarially Regularized Autoencoder",
author = "Chan, Alvin and
Tay, Yi and
Ong, Yew-Soon and
Zhang, Aston",
editor = "Cohn, Trevor and
He, Yulan and
Liu, Yang",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2020",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2020.findings-emnlp.373/",
doi = "10.18653/v1/2020.findings-emnlp.373",
pages = "4175--4189",
abstract = "This paper demonstrates a fatal vulnerability in natural language inference (NLI) and text classification systems. More concretely, we present a `backdoor poisoning' attack on NLP models. Our poisoning attack utilizes conditional adversarially regularized autoencoder (CARA) to generate poisoned training samples by poison injection in latent space. Just by adding 1{\%} poisoned data, our experiments show that a victim BERT finetuned classifier{'}s predictions can be steered to the poison target class with success rates of $>80\%$ when the input hypothesis is injected with the poison signature, demonstrating that NLI and text classification systems face a huge security risk."
}
Markdown (Informal)
[Poison Attacks against Text Datasets with Conditional Adversarially Regularized Autoencoder](https://preview.aclanthology.org/fix-sig-urls/2020.findings-emnlp.373/) (Chan et al., Findings 2020)
ACL