@inproceedings{yao-anand-2026-probing,
title = "Probing the Attention Representation of Filler-Gap Dependency in Transformers",
author = "Yao, Ruoqing and
Anand, Pranav",
editor = "Voigt, Rob and
Warstadt, Alex and
Feldman, Naomi and
Linzen, Tal",
booktitle = "Proceedings of the Society for Computation in Linguistics 2026",
month = jul,
year = "2026",
address = "San Diego, CA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.scil-main.23/",
pages = "258--261",
ISBN = "979-8-89176-412-5",
abstract = "Prior work (Wilcox et al, 2024; Kobzeva et al., 2025) shows that neural language models exhibit filled-gap and unlicensed-gap effects, yet these effects attenuate with intervening clauses, especially with intervening overt complementizers. We conduct attention probing experiments on GPT-2 and identify two specific heads (layer 5, head 2, and layer 8, head 9) whose verb-to-filler attention correlates with filled-gap surprisal. The two heads are sensitive to clausal intervention but not to linear distance, and they show distinct patterns in islands. When intervening overt complementizers appear, head 2 of layer 5{'}s attention redistributes from the filler to the nearest complementizer, producing an ``attend-closest-C'' pattern, while head 9 of layer 8 does not. These results may suggest that LMs may have allocated distinct linguistically meaningful representations from the training data to individual attention heads, but they fail to fully learn the correct grammars of FGDs."
}Markdown (Informal)
[Probing the Attention Representation of Filler-Gap Dependency in Transformers](https://preview.aclanthology.org/ingest-acl-workshops/2026.scil-main.23/) (Yao & Anand, SCiL 2026)
ACL