@inproceedings{demir-etal-2026-timeres,
title = "{T}ime{R}es: A {T}urkish Benchmark For Evaluating Temporal Understanding of Large Language Models",
author = {Demir, Habib Ya{\u{g}}{\i}z and
Atlamaz, {\"U}mit and
{\"U}sk{\"u}darl{\i}, Susan},
editor = "Baez Santamaria, Selene and
Somayajula, Sai Ashish and
Yamaguchi, Atsuki",
booktitle = "Proceedings of the 19th Conference of the {E}uropean Chapter of the {A}ssociation for {C}omputational {L}inguistics (Volume 4: Student Research Workshop)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-eacl/2026.eacl-srw.67/",
pages = "910--920",
ISBN = "979-8-89176-383-8",
abstract = "Temporal information is an essential part of communication, and understanding language requires processing it effectively. Despite recent advances, Large Language Models (LLMs) still struggle with temporal understanding.Existing benchmarks primarily focus on English and underexplore how linguistic structure contributes to temporal meaning.As a result, temporal understanding in languages other than English remains largely understudied.In this paper, we introduce TimeRes, a Turkish benchmark for evaluating temporal understanding of LLMs. TimeRes aims to investigate comprehension of Reichenbach{'}s temporal points and reported speech through date arithmetic.Our dataset includes 4,600 questions across 4 tasks at two levels of complexity, and presents a paired question formulation to distinguish temporal discourse understanding from temporal arithmetic capabilities.We evaluated six LLMs, and demonstrated that models struggle to resolve reported speech and fail to generalize across word order variations."
}Markdown (Informal)
[TimeRes: A Turkish Benchmark For Evaluating Temporal Understanding of Large Language Models](https://preview.aclanthology.org/ingest-eacl/2026.eacl-srw.67/) (Demir et al., EACL 2026)
ACL