@inproceedings{vuth-etal-2026-latents,
title = "From Latents to Labels: Zero-Shot Named Entity Recognition using Sparse Autoencoder Features",
author = "Vuth, Nakanyseth and
S{\'e}rasset, Gilles and
Schwab, Didier",
editor = "Mohammad, Saif M. and
Ousidhoum, Nedjma",
booktitle = "Proceedings of the 15th Joint Conference on Lexical and Computational Semantics (*{SEM} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.starsem-conference.11/",
pages = "164--177",
ISBN = "979-8-89176-413-2",
abstract = "Zero-shot Named Entity Recognition is critical for low-resource domains, yet existing approaches rely on opaque prompting of large language models or dense representations that suffer from polysemanticity. We propose an alternative approach that leverages monosemantic features of Sparse Autoencoders. We introduce $\textbf{SAE-NER}$, a training-free framework that maps monosemantic SAE feature activations to entity types through direct precision estimation, requiring no supervision or prompting. Experiments across general and biomedical domains show that SAE-NER consistently outperforms trained probing classifiers, with especially a large margin in the biomedical domain (up to +20 F1). Finally, we evaluate the utility of SAE-NER predictions as silver training data for downstream NER models. Using controlled perturbations of gold annotations to simulate realistic annotation noise, we show that false negatives are the primary bottleneck for silver-data quality, outweighing the impact of boundary imprecision and false positives."
}Markdown (Informal)
[From Latents to Labels: Zero-Shot Named Entity Recognition using Sparse Autoencoder Features](https://preview.aclanthology.org/ingest-acl-workshops/2026.starsem-conference.11/) (Vuth et al., *SEM 2026)
ACL