@inproceedings{clark-etal-2025-linear,
title = "Linear Recency Bias During Training Improves Transformers' Fit to Reading Times",
author = "Clark, Christian and
Oh, Byung-Doh and
Schuler, William",
editor = "Rambow, Owen and
Wanner, Leo and
Apidianaki, Marianna and
Al-Khalifa, Hend and
Eugenio, Barbara Di and
Schockaert, Steven",
booktitle = "Proceedings of the 31st International Conference on Computational Linguistics",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2025.coling-main.517/",
pages = "7735--7747",
abstract = "Recent psycholinguistic research has compared human reading times to surprisal estimates from language models to study the factors shaping human sentence processing difficulty. Previous studies have shown a strong fit between surprisal values from Transformers and reading times. However, standard Transformers work with a lossless representation of the entire previous linguistic context, unlike models of human language processing that include memory decay. To bridge this gap, this paper evaluates a modification of the Transformer model that uses ALiBi (Press et al., 2022), a recency bias added to attention scores. Surprisal estimates from a Transformer that includes ALiBi during training and inference show an improved fit to human reading times compared to a standard Transformer baseline. A subsequent analysis of attention heads suggests that ALiBi`s mixture of slopes{---}which determine the rate of memory decay in each attention head{---}may play a role in the improvement by helping models with ALiBi to track different kinds of linguistic dependencies."
}
Markdown (Informal)
[Linear Recency Bias During Training Improves Transformers’ Fit to Reading Times](https://preview.aclanthology.org/jlcl-multiple-ingestion/2025.coling-main.517/) (Clark et al., COLING 2025)
ACL