@inproceedings{emura-sugawara-2026-dual,
title = "A Dual-Task Paradigm to Investigate Sentence Comprehension Strategies in Language Models",
author = "Emura, Rei and
Sugawara, Saku",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.acl-long.552/",
pages = "12065--12084",
ISBN = "979-8-89176-390-6",
abstract = "Language models (LMs) behave more like humans when their cognitive resources are restricted, particularly in predicting sentence processing costs such as reading times. However, it remains unclear whether such constraints similarly affect sentence comprehension strategies, and existing methods do not directly target the balance between memory storage and sentence processing, which is central to human working memory. To address this issue, we propose a dual-task paradigm that combines an arithmetic computation task with a sentence comprehension task, such as ``The 2 cocktail + blended 3 =...''. Our experiments show that under dual-task conditions, GPT-4o, o3-mini, and o4-mini shift toward plausibility-based comprehension, mirroring humans' rational inference. Specifically, these models show a greater accuracy gap between plausible sentences (e.g., ``The cocktail was blended by the bartender'') and implausible sentences (e.g., ``The bartender was blended by the cocktail'') in the dual-task condition compared to the single-task conditions. These findings suggest that constraints on the balance between memory and processing resources promote rational inference in LMs. More broadly, they support the view that human-like sentence comprehension fundamentally arises from the allocation of limited cognitive resources."
}Markdown (Informal)
[A Dual-Task Paradigm to Investigate Sentence Comprehension Strategies in Language Models](https://preview.aclanthology.org/ingest-acl/2026.acl-long.552/) (Emura & Sugawara, ACL 2026)
ACL