@inproceedings{parikh-etal-2022-canary,
title = "Canary Extraction in Natural Language Understanding Models",
author = "Parikh, Rahil and
Dupuy, Christophe and
Gupta, Rahul",
editor = "Muresan, Smaranda and
Nakov, Preslav and
Villavicencio, Aline",
booktitle = "Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2022.acl-short.61/",
doi = "10.18653/v1/2022.acl-short.61",
pages = "552--560",
abstract = "Natural Language Understanding (NLU) models can be trained on sensitive information such as phone numbers, zip-codes etc. Recent literature has focused on Model Inversion Attacks (ModIvA) that can extract training data from model parameters. In this work, we present a version of such an attack by extracting canaries inserted in NLU training data. In the attack, an adversary with open-box access to the model reconstructs the canaries contained in the model{'}s training set. We evaluate our approach by performing text completion on canaries and demonstrate that by using the prefix (non-sensitive) tokens of the canary, we can generate the full canary. As an example, our attack is able to reconstruct a four digit code in the training dataset of the NLU model with a probability of 0.5 in its best configuration. As countermeasures, we identify several defense mechanisms that, when combined, effectively eliminate the risk of ModIvA in our experiments."
}
Markdown (Informal)
[Canary Extraction in Natural Language Understanding Models](https://preview.aclanthology.org/fix-sig-urls/2022.acl-short.61/) (Parikh et al., ACL 2022)
ACL
- Rahil Parikh, Christophe Dupuy, and Rahul Gupta. 2022. Canary Extraction in Natural Language Understanding Models. In Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers), pages 552–560, Dublin, Ireland. Association for Computational Linguistics.