@inproceedings{gottesman-etal-2025-eliciting,
title = "Eliciting Textual Descriptions from Representations of Continuous Prompts",
author = "Gottesman, Daniela and
Geva, Mor and
Ramati, Dana",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/display_plenaries/2025.findings-acl.849/",
pages = "16545--16562",
ISBN = "979-8-89176-256-5",
abstract = "Continuous prompts, or ``soft prompts'', are a widely-adopted parameter-efficient tuning strategy for large language models, but are often less favorable due to their opaque nature. Prior attempts to interpret continuous prompts relied on projecting individual prompt tokens onto the vocabulary space. However, this approach is problematic as performant prompts can yield arbitrary or contradictory text, and it individually interprets each prompt token. In this work, we propose a new approach to interpret continuous prompts that elicits textual descriptions from their representations during model inference. Using a Patchscopes variant (Ghandeharioun et al., 2024) called InSPEcT over various tasks, we show our method often yields accurate task descriptions which become more faithful as task performance increases. Moreover, an elaborated version of InSPEcT reveals biased features in continuous prompts, whose presence correlates with biased model predictions. Providing an effective interpretability solution, InSPEcT can be leveraged to debug unwanted properties in continuous prompts and inform developers on ways to mitigate them."
}
Markdown (Informal)
[Eliciting Textual Descriptions from Representations of Continuous Prompts](https://preview.aclanthology.org/display_plenaries/2025.findings-acl.849/) (Gottesman et al., Findings 2025)
ACL