@inproceedings{oguz-etal-2025-un,
title = "Un-considering Contextual Information: Assessing {LLM}s' Understanding of Indexical Elements",
author = "O{\u{g}}uz, Metehan and
Bakman, Yavuz Faruk and
Yaldiz, Duygu Nur",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/mtsummit-25-ingestion/2025.findings-acl.1203/",
doi = "10.18653/v1/2025.findings-acl.1203",
pages = "23410--23427",
ISBN = "979-8-89176-256-5",
abstract = "Large Language Models (LLMs) have demonstrated impressive performances in tasks related to coreference resolution. However, previous studies mostly assessed LLM performance on coreference resolution with nouns and third person pronouns. This study evaluates LLM performance on coreference resolution with indexical like I, you, here and tomorrow which come with unique challenges due to their linguistic properties. We present the first study examining how LLMs interpret indexicals in English, releasing the English Indexical Dataset with 1600 multiple-choice questions. We evaluate pioneering LLMs, including GPT-4o, Claude 3.5 Sonnet, Gemini 1.5 Pro, and DeepSeek V3. Our results reveal that LLMs exhibit an impressive performance with some indexicals (I), while struggling with others (you, here, tomorrow), and that syntactic cues (e.g. quotation) contribute to LLM performance with some indexicals, while they reduce performance with others. Code and data are available at: https://github.com/metehanoguzz/LLMs-Indexicals-English"
}
Markdown (Informal)
[Un-considering Contextual Information: Assessing LLMs’ Understanding of Indexical Elements](https://preview.aclanthology.org/mtsummit-25-ingestion/2025.findings-acl.1203/) (Oğuz et al., Findings 2025)
ACL