@inproceedings{mittal-2026-forget,
title = "Did You Forget What {I} Asked? Prospective Memory Failures in Large Language Models",
author = "Mittal, Avni",
editor = "Chang, Kai-Wei and
Mehrabi, Ninareh and
Krishna, Satyapriya and
Das, Anubrata and
Dhamala, Jwala and
Cao, Yang Trista and
Kumarage, Tharindu and
Ramakrishna, Anil and
Christodoulopoulos, Christos and
Wan, Yixin and
Galystan, Aram and
Kumar, Anoop and
Gupta, Rahul",
booktitle = "Proceedings of the 6th Workshop on Trustworthy {NLP} ({T}rust{NLP} 2026)",
month = jul,
year = "2026",
address = "San Diego, California",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.trustnlp-main.33/",
pages = "471--488",
ISBN = "979-8-89176-418-7",
abstract = "Large language models often fail to satisfy formatting instructions when they must simultaneously perform demanding tasks. We study this behavior through a prospective memory-inspired lens from cognitive psychology, using a controlled paradigm that combines verifiable formatting constraints with benchmark tasks of increasing complexity. Across three model families and over 8,000 prompts, compliance drops by 2{--}21{\%} under concurrent task load. Vulnerability is highly type-dependent: terminal constraints (requiring action at the response boundary) degrade most, with drops up to 50{\%}, while avoidance constraints remain comparatively robust. A salience-enhanced format (explicit instruction framing plus a trailing reminder) recovers much of the lost compliance, restoring performance to 90{--}100{\%} in many settings. Interference is bidirectional: formatting constraints can also reduce task accuracy, with one model{'}s GSM8K accuracy dropping from 93{\%} to 27{\%}. In additional stacking experiments, joint compliance declines sharply as constraints accumulate. All results use deterministic programmatic checkers, with no LLM-as-judge component, on publicly available datasets."
}Markdown (Informal)
[Did You Forget What I Asked? Prospective Memory Failures in Large Language Models](https://preview.aclanthology.org/ingest-acl-workshops/2026.trustnlp-main.33/) (Mittal, TrustNLP 2026)
ACL