@inproceedings{ryu-2023-plausibility,
title = "Plausibility Processing in Transformer Language Models: Focusing on the Role of Attention Heads in {GPT}",
author = "Ryu, Soo",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2023",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2023.findings-emnlp.27/",
doi = "10.18653/v1/2023.findings-emnlp.27",
pages = "356--369",
abstract = "The goal of this paper is to explore how Transformer language models process semantic knowledge, especially regarding the plausibility of noun-verb relations. First, I demonstrate GPT2 exhibits a higher degree of similarity with humans in plausibility processing compared to other Transformer language models. Next, I delve into how knowledge of plausibility is contained within attention heads of GPT2 and how these heads causally contribute to GPT2{'}s plausibility processing ability. Through several experiments, it was found that: i) GPT2 has a number of attention heads that detect plausible noun-verb relationships; ii) these heads collectively contribute to the Transformer{'}s ability to process plausibility, albeit to varying degrees; and iii) attention heads' individual performance in detecting plausibility does not necessarily correlate with how much they contribute to GPT2{'}s plausibility processing ability."
}
Markdown (Informal)
[Plausibility Processing in Transformer Language Models: Focusing on the Role of Attention Heads in GPT](https://preview.aclanthology.org/fix-sig-urls/2023.findings-emnlp.27/) (Ryu, Findings 2023)
ACL