@inproceedings{sastre-ros-a-2026-concept,
title = "Concept Tokens: Learning Behavioral Embeddings Through Concept Definitions",
author = "Sastre, Ignacio and
Ros{\textbackslash}'a, Aiala",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.findings-acl.1319/",
pages = "26501--26518",
ISBN = "979-8-89176-395-1",
abstract = "We propose Concept Tokens, a lightweight method that adds a new special token to a pretrained LLM and learns only its embedding from multiple natural language definitions of a target concept, where occurrences of the concept are replaced by the new token. The LLM is kept frozen and the embedding is optimized with the standard language-modeling objective. We evaluate Concept Tokens in three settings. First, we study hallucinations in closed-book question answering on HotpotQA and find a directional effect: negating the hallucination token reduces hallucinated answers mainly by increasing abstentions, whereas asserting it increases hallucinations and lowers precision. Second, we induce recasting, a pedagogical feedback strategy for second language teaching, and observe the same directional effect. Moreover, compared to providing the full definitional corpus in-context, concept tokens better preserve compliance with other instructions (e.g., asking follow-up questions). Finally, we include a qualitative study with the Eiffel Tower and a fictional ``Austral Tower'' to illustrate what information the learned embeddings capture and where their limitations emerge. Overall, Concept Tokens provide a compact control signal learned from definitions that can steer behavior in frozen LLMs."
}Markdown (Informal)
[Concept Tokens: Learning Behavioral Embeddings Through Concept Definitions](https://preview.aclanthology.org/ingest-acl/2026.findings-acl.1319/) (Sastre & Ros\'a, Findings 2026)
ACL