@inproceedings{bhatia-etal-2025-datelogicqa,
title = "{D}ate{L}ogic{QA}: Benchmarking Temporal Biases in Large Language Models",
author = "Bhatia, Gagan and
Tang, Ming Ze and
Mahanta, Cristina and
Kazi, Madiha",
editor = "Ebrahimi, Abteen and
Haider, Samar and
Liu, Emmy and
Haider, Sammar and
Leonor Pacheco, Maria and
Wein, Shira",
booktitle = "Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 4: Student Research Workshop)",
month = apr,
year = "2025",
address = "Albuquerque, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/Ingest-2025-COMPUTEL/2025.naacl-srw.32/",
pages = "321--332",
ISBN = "979-8-89176-192-6",
abstract = "We introduce DateLogicQA, a human-curated benchmark of 190 questions specifically designed to understand temporal bias in Large Language Models (LLMs). Covering seven date formats across past, present, and future contexts, DateLogicQA examines four reasoning types: commonsense, factual, conceptual, and numerical. Through human-led evaluations of 12 state-of-the-art LLMs, we identify Representation-Level Bias, arising from suboptimal embeddings that distort date semantics, and Logical-Level Bias, manifesting when correct date tokens yield flawed temporal reasoning. Our findings underscore persistent challenges in handling various date formats and temporal contexts, revealing the need for more robust pretraining data, targeted post-training methods, and precise tokenization strategies. By illuminating these biases, we provide actionable insights to guide the development of LLMs for accurate temporal reasoning across diverse real-world applications."
}
Markdown (Informal)
[DateLogicQA: Benchmarking Temporal Biases in Large Language Models](https://preview.aclanthology.org/Ingest-2025-COMPUTEL/2025.naacl-srw.32/) (Bhatia et al., NAACL 2025)
ACL
- Gagan Bhatia, Ming Ze Tang, Cristina Mahanta, and Madiha Kazi. 2025. DateLogicQA: Benchmarking Temporal Biases in Large Language Models. In Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 4: Student Research Workshop), pages 321–332, Albuquerque, USA. Association for Computational Linguistics.