@inproceedings{lepori-etal-2025-racing,
title = "Racing Thoughts: Explaining Contextualization Errors in Large Language Models",
author = "Lepori, Michael A. and
Mozer, Michael Curtis and
Ghandeharioun, Asma",
editor = "Chiruzzo, Luis and
Ritter, Alan and
Wang, Lu",
booktitle = "Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)",
month = apr,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2025.naacl-long.155/",
pages = "3020--3036",
ISBN = "979-8-89176-189-6",
abstract = "The profound success of transformer-based language models can largely be attributed to their ability to integrate relevant contextual information from an input sequence in order to generate a response or complete a task. However, we know very little about the algorithms that a model employs to implement this capability, nor do we understand their failure modes. For example, given the prompt ``John is going fishing, so he walks over to the bank. Can he make an ATM transaction?'', a model may incorrectly respond ``Yes'' if it has not properly contextualized ``bank'' as a geographical feature, rather than a financial institution. We propose the LLM Race Conditions Hypothesis as an explanation of contextualization errors of this form. This hypothesis identifies dependencies between tokens (e.g., ``bank'' must be properly contextualized before the final token, ``?'', integrates information from ``bank''), and claims that contextualization errors are a result of violating these dependencies. Using a variety of techniques from mechanistic interpretability, we provide correlational and causal evidence in support of the hypothesis and suggest inference-time interventions to address it."
}
Markdown (Informal)
[Racing Thoughts: Explaining Contextualization Errors in Large Language Models](https://preview.aclanthology.org/fix-sig-urls/2025.naacl-long.155/) (Lepori et al., NAACL 2025)
ACL
- Michael A. Lepori, Michael Curtis Mozer, and Asma Ghandeharioun. 2025. Racing Thoughts: Explaining Contextualization Errors in Large Language Models. In Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers), pages 3020–3036, Albuquerque, New Mexico. Association for Computational Linguistics.