@inproceedings{zhen-etal-2026-travelbehaviorqa,
title = "{T}ravel{B}ehavior{QA}: A Benchmark Dataset for Behavioral Interpretation of {GPS} Trajectories",
author = "Zhen, Dongyang and
Duan, Niping and
Zhou, Huan and
Cui, Qingbin",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.findings-acl.1604/",
pages = "32053--32071",
ISBN = "979-8-89176-395-1",
abstract = "GPS trajectories encode rich behavioral information about how people move, organize activities, and form daily routines. Recent advances in large language models (LLMs) raise a natural question: can such models infer and summarize travel behavior directly from mobility traces? This paper introduces TravelBehaviorQA, a large-scale benchmark dataset that reframes trajectory analysis as a language-based behavioral understanding task. The dataset links raw GPS trajectories with human-grounded question-answering (QA) pairs that capture travel intensity, temporal structure, activity patterns, mode usage, and behavioral routines. Unlike prior mobility datasets focused on prediction or classification, TravelBehaviorQA emphasizes semantic interpretation through a unified mix of deterministic and open-ended questions. In this benchmark, we construct over 143k QA instances spanning users and years, and evaluate a broad range of state-of-the-art LLMs under controlled settings. Our results reveal substantial gaps between factual extraction and genuine behavioral reasoning, showing that model scale alone is insufficient and that trajectory representation is a primary bottleneck. TravelBehaviorQA exposes critical limitations of current models and establishes a rigorous benchmark for advancing language-based understanding of human mobility behavior."
}Markdown (Informal)
[TravelBehaviorQA: A Benchmark Dataset for Behavioral Interpretation of GPS Trajectories](https://preview.aclanthology.org/ingest-acl-workshops/2026.findings-acl.1604/) (Zhen et al., Findings 2026)
ACL