@inproceedings{frikha-etal-2025-privacyscalpel,
    title = "{P}rivacy{S}calpel: Enhancing {LLM} Privacy via Interpretable Feature Intervention with Sparse Autoencoders",
    author = "Frikha, Ahmed  and
      Razi, Muhammad Reza Ar  and
      Nakka, Krishna Kanth  and
      Mendes, Ricardo  and
      Jiang, Xue  and
      Zhou, Xuebing",
    editor = "Belinkov, Yonatan  and
      Mueller, Aaron  and
      Kim, Najoung  and
      Mohebbi, Hosein  and
      Chen, Hanjie  and
      Arad, Dana  and
      Sarti, Gabriele",
    booktitle = "Proceedings of the 8th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2025",
    address = "Suzhou, China",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-emnlp/2025.blackboxnlp-1.13/",
    pages = "226--238",
    ISBN = "979-8-89176-346-3"
}