@inproceedings{cho-hockenmaier-2025-toward,
title = "Toward Efficient Sparse Autoencoder-Guided Steering for Improved In-Context Learning in Large Language Models",
author = "Cho, Ikhyun and
Hockenmaier, Julia",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-main.1474/",
pages = "28949--28961",
ISBN = "979-8-89176-332-6",
abstract = "Sparse autoencoders (SAEs) have emerged as a powerful analytical tool in mechanistic interpretability for large language models (LLMs), with growing success in applications beyond interpretability. Building on this momentum, we present a novel approach that leverages SAEs to enhance the general in-context learning (ICL) performance of LLMs.Specifically, we introduce Feature Detection through Prompt Variation (FDPV), which leverages the SAE{'}s remarkable ability to capture subtle differences between prompts, enabling efficient feature selection for downstream steering. In addition, we propose a novel steering method tailored to ICL{---}Selective In-Context Steering (SISTER){---}grounded in recent insights from ICL research that LLMs utilize label words as key anchors. Our method yields a 3.5{\%} average performance improvement across diverse text classification tasks and exhibits greater robustness to hyperparameter variations compared to standard steering approaches. Our code is available at https://github.com/ihcho2/SAE-ICL."
}Markdown (Informal)
[Toward Efficient Sparse Autoencoder-Guided Steering for Improved In-Context Learning in Large Language Models](https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-main.1474/) (Cho & Hockenmaier, EMNLP 2025)
ACL