@inproceedings{zaghouani-2026-linguistic,
title = "Linguistic Identity Leakage: When Language Reveals Identity in Anonymized Text",
author = "Zaghouani, Wajdi",
editor = "Habernal, Ivan and
Ghanavati, Sepideh and
Haghighi, Sara and
Ramesh, Krithika and
Igamberdiev, Timour and
Wilson, Shomir",
booktitle = "Proceedings of the Seventh Workshop on Privacy in Natural Language Processing",
month = jul,
year = "2026",
address = "San Diego, California",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.privatenlp-main.8/",
pages = "107--117",
ISBN = "979-8-89176-397-5",
abstract = "Privacy-preserving natural language processing (NLP) typically focuses on removing explicit identifiers such as names, addresses, and phone numbers. We argue that this approach overlooks a key risk: natural language itself encodes signals about a speaker{'}s geographic origin, social background, and community membership that persist after anonymization. We introduce Linguistic Identity Leakage (LIL), defined as the inference of personal or demographic attributes from linguistic features in text where explicit identifiers have been removed. We further introduce Linguistic Personally Identifiable Information (L-PII) to denote the linguistic features that enable such inference. Drawing on sociolinguistics, stylometry, and NLP privacy research, we propose a taxonomy of linguistic identity signals across five categories and examine implications for dataset release, language model training, and privacy auditing. Using examples from Arabic dialectal variation and other multilingual contexts, we present the \textbf{Identity Inference Risk} (IIR) framework for assessing residual privacy risk in NLP systems and discuss how contemporary LLMs amplify these risks. Our goal is to encourage broader recognition of the gap between conventional anonymization practices and the linguistic reality of natural language data."
}Markdown (Informal)
[Linguistic Identity Leakage: When Language Reveals Identity in Anonymized Text](https://preview.aclanthology.org/ingest-acl-workshops/2026.privatenlp-main.8/) (Zaghouani, PrivateNLP 2026)
ACL