@inproceedings{frikha-etal-2025-privacyscalpel, title = "{P}rivacy{S}calpel: Enhancing {LLM} Privacy via Interpretable Feature Intervention with Sparse Autoencoders", author = "Frikha, Ahmed and Razi, Muhammad Reza Ar and Nakka, Krishna Kanth and Mendes, Ricardo and Jiang, Xue and Zhou, Xuebing", editor = "Belinkov, Yonatan and Mueller, Aaron and Kim, Najoung and Mohebbi, Hosein and Chen, Hanjie and Arad, Dana and Sarti, Gabriele", booktitle = "Proceedings of the 8th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP", month = nov, year = "2025", address = "Suzhou, China", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/ingest-emnlp/2025.blackboxnlp-1.13/", pages = "226--238", ISBN = "979-8-89176-346-3" }