@inproceedings{zhao-etal-2026-discovering,
title = "Discovering and Causally Validating Emotion-Sensitive Neurons in Large Audio-Language Models",
author = {Zhao, Xiutian and
Schuller, Bj{\"o}rn and
Sisman, Berrak},
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.acl-long.687/",
pages = "15056--15071",
ISBN = "979-8-89176-390-6",
abstract = "Emotion is a central dimension of spoken communication, yet, we still lack a mechanistic account of how modern large audio-language models (LALMs) encode it internally. We present the first neuron-level interpretability study of emotion-sensitive neurons (ESNs) in LALMs and provide causal evidence supporting the existence of such units in Qwen2.5-Omni, Kimi-Audio, and Audio Flamingo 3. Across these three widely used open-source models, we compare frequency-, entropy-, mean-deviation-, and contrast-based neuron selectors on multiple emotion recognition benchmarks. Using inference-time interventions, we reveal a consistent emotion-specific signature: deactivating neurons selected for a given emotion disproportionately degrades recognition of that emotion while largely preserving other classes, whereas targeted steering amplifies these units to bias predictions toward the target emotion. These effects arise with modest amounts of identification data and scale systematically with intervention strength. We further observe that ESNs exhibit non-uniform layer-wise clustering with partial cross-dataset transfer. Taken together, our results offer a causal, neuron-level account of emotion decisions in LALMs and highlight targeted neuron interventions as an actionable handle for controllable affective behaviors."
}Markdown (Informal)
[Discovering and Causally Validating Emotion-Sensitive Neurons in Large Audio-Language Models](https://preview.aclanthology.org/ingest-acl/2026.acl-long.687/) (Zhao et al., ACL 2026)
ACL