@inproceedings{meem-etal-2024-pat,
title = "{PAT}-Questions: A Self-Updating Benchmark for Present-Anchored Temporal Question-Answering",
author = "Meem, Jannat and
Rashid, Muhammad and
Dong, Yue and
Hristidis, Vagelis",
editor = "Ku, Lun-Wei and
Martins, Andre and
Srikumar, Vivek",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2024",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.findings-acl.777/",
doi = "10.18653/v1/2024.findings-acl.777",
pages = "13129--13148",
abstract = "Existing work on Temporal Question Answering (TQA) has predominantly focused on questions anchored to specific timestamps or events (e.g. {\textquoteleft}Who was the US president in 1970?'). Little work has studied questions whose temporal context is relative to the present time (e.g. {\textquoteleft}Who was the previous US president?'). We refer to this problem as Present-Anchored Temporal QA (PATQA). PATQA poses unique challenges: (1) large language models (LLMs) may have outdated knowledge, (2) complex temporal relationships (e.g. {\textquoteleft}before', {\textquoteleft}previous') are hard to reason, (3) multi-hop reasoning may be required, and (4) the gold answers of benchmarks must be continuously updated. To address these challenges, we introduce the PAT-Questions benchmark, which includes single and multi-hop temporal questions. The answers in PAT-Questions can be automatically refreshed by re-running SPARQL queries on a knowledge graph, if available. We evaluate several state-of-the-art LLMs and a SOTA temporal reasoning model (TEMPREASON-T5) on PAT-Questions through direct prompting and retrieval-augmented generation (RAG). The results highlight the limitations of existing solutions in PATQA and motivate the need for new methods to improve PATQA reasoning capabilities."
}
Markdown (Informal)
[PAT-Questions: A Self-Updating Benchmark for Present-Anchored Temporal Question-Answering](https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.findings-acl.777/) (Meem et al., Findings 2024)
ACL