@inproceedings{cho-etal-2025-versatility,
title = "On the Versatility of Sparse Autoencoders for In-Context Learning",
author = "Cho, Ikhyun and
Kwon, Gaeul and
Hockenmaier, Julia",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.findings-emnlp.1063/",
doi = "10.18653/v1/2025.findings-emnlp.1063",
pages = "19531--19538",
ISBN = "979-8-89176-335-7",
abstract = "Sparse autoencoders (SAEs) are emerging as a key analytical tool in the field of mechanistic interpretability for large language models (LLMs). While SAEs have primarily been used for interpretability, we shift focus and explore an understudied question: ``Can SAEs be applied to practical tasks beyond interpretability?'' Given that SAEs are trained on billions of tokens for sparse reconstruction, we believe they can serve as effective extractors, offering a wide range of useful knowledge that can benefit practical applications. Building on this motivation, we demonstrate that SAEs can be effectively applied to in-context learning (ICL). In particular, we highlight the utility of the SAE-reconstruction loss by showing that it provides a valuable signal in ICL{---}exhibiting a strong correlation with LLM performance and offering a powerful unsupervised approach for prompt selection. These findings underscore the versatility of SAEs and reveal their potential for real-world applications beyond interpretability. Our code is available at https://github.com/ihcho2/SAE-GPS."
}Markdown (Informal)
[On the Versatility of Sparse Autoencoders for In-Context Learning](https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.findings-emnlp.1063/) (Cho et al., Findings 2025)
ACL