@inproceedings{li-etal-2026-mechanistic,
title = "Mechanistic Interpretability of {A}nimacy Effects on Structure Choice in {GPT}-2",
author = "Li, Yue and
Cong, Yan and
Francis, Elaine J.",
editor = "Bonial, Claire and
Berzak, Yevgeni",
booktitle = "Proceedings of the 30th Conference on Computational Natural Language Learning",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.conll-main.38/",
pages = "626--640",
ISBN = "979-8-89176-410-1",
abstract = "Language models (LMs) exhibit human-like behavior across linguistic tasks, yet behavioral similarity does not establish mechanistic correspondence. Animacy {---} whether an entity is alive and sentient {---} is a well-documented semantic feature shaping linguistic behavior in humans. Although LMs show animacy sensitivity behaviorally, the mechanistic basis remains unexplored. In this study, we probe GPT-2 Small{'}s internal circuitry to test whether animacy representations causally drive syntactic structure choice. Activation patching confirms causality: swapping animacy representations in the model shifts its downstream output. Critically, bidirectional patching reveals that animacy conditions differ in how strongly they commit to a structure: some animacy configurations resist perturbation and exert strong causal influence, while others remain flexible. We identify 22 attention heads mediating these effects, split between passive-promoting and passive-suppressing populations, suggesting GPT-2 Small{'}s structure choice likely emerges from internal competition between opposing heads. These findings provide mechanistic grounding for animacy effects documented in extensive psycholinguistics research and demonstrate how interpretability methods can enrich and test psycholinguistic theory."
}Markdown (Informal)
[Mechanistic Interpretability of Animacy Effects on Structure Choice in GPT-2](https://preview.aclanthology.org/ingest-acl-workshops/2026.conll-main.38/) (Li et al., CoNLL 2026)
ACL