@inproceedings{vaidya-etal-2023-humans,
title = "Humans and language models diverge when predicting repeating text",
author = "Vaidya, Aditya and
Turek, Javier and
Huth, Alexander",
editor = "Jiang, Jing and
Reitter, David and
Deng, Shumin",
booktitle = "Proceedings of the 27th Conference on Computational Natural Language Learning (CoNLL)",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.conll-1.5/",
doi = "10.18653/v1/2023.conll-1.5",
pages = "58--69",
abstract = "Language models that are trained on the next-word prediction task have been shown to accurately model human behavior in word prediction and reading speed. In contrast with these findings, we present a scenario in which the performance of humans and LMs diverges. We collected a dataset of human next-word predictions for five stimuli that are formed by repeating spans of text. Human and GPT-2 LM predictions are strongly aligned in the first presentation of a text span, but their performance quickly diverges when memory (or in-context learning) begins to play a role. We traced the cause of this divergence to specific attention heads in a middle layer. Adding a power-law recency bias to these attention heads yielded a model that performs much more similarly to humans. We hope that this scenario will spur future work in bringing LMs closer to human behavior."
}
Markdown (Informal)
[Humans and language models diverge when predicting repeating text](https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.conll-1.5/) (Vaidya et al., CoNLL 2023)
ACL