@inproceedings{yao-etal-2026-adaptivek,
title = "{A}daptive{K}: Complexity-Driven Sparse Autoencoders for Interpretable Language Model Representations",
author = "Yao, Yifei and
Zhang, Hanrong and
Du, Mengnan",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.findings-acl.1187/",
pages = "23702--23728",
ISBN = "979-8-89176-395-1",
abstract = "Understanding the internal representations of large language models (LLMs) remains a central challenge for interpretability research. Sparse autoencoders (SAEs) offer a promising solution by decomposing activations into interpretable features, but existing approaches rely on fixed sparsity constraints that fail to account for input complexity. We propose AdaptiveK SAE (Adaptive Top K Sparse Autoencoders), a novel framework that dynamically adjusts sparsity levels based on the semantic complexity of each input. Leveraging linear probes, we demonstrate that context complexity is linearly encoded in LLM representations, and we use this signal to guide feature allocation during training. Experiments across ten language models demonstrate that this complexity-driven adaptation outperforms fixed-sparsity approaches on reconstruction fidelity, explained variance, cosine similarity and interpretability metrics while eliminating the burden of extensive hyperparameter tuning. Our code is available at: https://github.com/hiyukie/adaptiveK."
}Markdown (Informal)
[AdaptiveK: Complexity-Driven Sparse Autoencoders for Interpretable Language Model Representations](https://preview.aclanthology.org/ingest-acl/2026.findings-acl.1187/) (Yao et al., Findings 2026)
ACL