@inproceedings{yang-etal-2025-robust,
title = "Robust Utility-Preserving Text Anonymization Based on Large Language Models",
author = "Yang, Tianyu and
Zhu, Xiaodan and
Gurevych, Iryna",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/landing_page/2025.acl-long.1404/",
pages = "28922--28941",
ISBN = "979-8-89176-251-0",
abstract = "Anonymizing text that contains sensitive information is crucial for a wide range of applications. Existing techniques face the emerging challenges of the re-identification ability of large language models (LLMs), which have shown advanced capability in memorizing detailed information and reasoning over dispersed pieces of patterns to draw conclusions. When defending against LLM-based re-identification, anonymization could jeopardize the utility of the resulting anonymized data in downstream tasks. In general, the interaction between anonymization and data utility requires a deeper understanding within the context of LLMs. In this paper, we propose a framework composed of three key LLM-based components: $\textit{a privacy evaluator}$, $\textit{a utility evaluator}$ and $\textit{an optimization component}$, which work collaboratively to perform anonymization. Extensive experiments demonstrate that the proposed model outperforms existing baselines, showing robustness in reducing the risk of re-identification while preserving greater data utility in downstream tasks. We provide detailed studies on these core modules. To consider large-scale and real-time applications, we investigate the distillation of the anonymization capabilities into lightweight models. All of our code and datasets will be made publicly available at $\texttt{[Github URL]}$."
}
Markdown (Informal)
[Robust Utility-Preserving Text Anonymization Based on Large Language Models](https://preview.aclanthology.org/landing_page/2025.acl-long.1404/) (Yang et al., ACL 2025)
ACL