@inproceedings{meehan-etal-2022-sentence,
title = "Sentence-level Privacy for Document Embeddings",
author = "Meehan, Casey and
Mrini, Khalil and
Chaudhuri, Kamalika",
editor = "Muresan, Smaranda and
Nakov, Preslav and
Villavicencio, Aline",
booktitle = "Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2022.acl-long.238/",
doi = "10.18653/v1/2022.acl-long.238",
pages = "3367--3380",
abstract = "User language data can contain highly sensitive personal content. As such, it is imperative to offer users a strong and interpretable privacy guarantee when learning from their data. In this work we propose SentDP, pure local differential privacy at the sentence level for a single user document. We propose a novel technique, DeepCandidate, that combines concepts from robust statistics and language modeling to produce high (768) dimensional, general $\epsilon$-SentDP document embeddings. This guarantees that any single sentence in a document can be substituted with any other sentence while keeping the embedding $\epsilon$-indistinguishable. Our experiments indicate that these private document embeddings are useful for downstream tasks like sentiment analysis and topic classification and even outperform baseline methods with weaker guarantees like word-level Metric DP."
}
Markdown (Informal)
[Sentence-level Privacy for Document Embeddings](https://preview.aclanthology.org/fix-sig-urls/2022.acl-long.238/) (Meehan et al., ACL 2022)
ACL
- Casey Meehan, Khalil Mrini, and Kamalika Chaudhuri. 2022. Sentence-level Privacy for Document Embeddings. In Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 3367–3380, Dublin, Ireland. Association for Computational Linguistics.