@inproceedings{bhaila-etal-2025-soft,
title = "Soft Prompting for Unlearning in Large Language Models",
author = "Bhaila, Karuna and
Van, Minh-Hao and
Wu, Xintao",
editor = "Chiruzzo, Luis and
Ritter, Alan and
Wang, Lu",
booktitle = "Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)",
month = apr,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2025.naacl-long.204/",
pages = "4046--4056",
ISBN = "979-8-89176-189-6",
abstract = "The widespread popularity of Large Language Models (LLMs), partly due to their emerging in-context learning ability, has highlighted the importance of ethical and safety considerations for deployment. Motivated by corresponding data protection guidelines, we investigate machine unlearning for LLMs. In contrast to the growing literature on fine-tuning methods to achieve unlearning, we focus on a comparatively lightweight alternative called soft prompting to realize unlearning in LLMs. With losses designed to enforce forgetting as well as utility preservation, our framework Soft Prompting for Unlearning (SPUL) learns prompt tokens that are prepended to a query to induce unlearning of specific training examples at inference time without updating LLM parameters. We conduct a rigorous evaluation of the proposed method, and results indicate that SPUL can significantly improve the trade-off between utility and forgetting for text classification and question-answering. We further validate our method with LLMs of varying parameter sizes to highlight its flexibility and provide detailed insights into the choice of hyperparameters and the influence of the size of unlearning data."
}
Markdown (Informal)
[Soft Prompting for Unlearning in Large Language Models](https://preview.aclanthology.org/fix-sig-urls/2025.naacl-long.204/) (Bhaila et al., NAACL 2025)
ACL
- Karuna Bhaila, Minh-Hao Van, and Xintao Wu. 2025. Soft Prompting for Unlearning in Large Language Models. In Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers), pages 4046–4056, Albuquerque, New Mexico. Association for Computational Linguistics.