@inproceedings{li-etal-2026-longmp,
title = "{L}ong{MP}-Bench: A Benchmark for Multimodal Persona Understanding in Long-Term Dialogues",
author = "Li, Zhuoqun and
Huang, Zhaopei and
Wang, Wenxuan and
Jin, Qin",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.findings-acl.1159/",
pages = "23132--23160",
ISBN = "979-8-89176-395-1",
abstract = "Understanding multimodal user personas in long-term dialogues is essential for building personalized and human-like dialogue systems. However, existing datasets suffer from limited persona diversity and static, overly simplified settings, making them insufficient for capturing the complexity of real-world interactions. To address these limitations, we introduce LongMP-Bench, a benchmark designed to evaluate the capabilities of models in understanding evolving user personas within long-term multimodal dialogues. We present a multi-step, scalable data construction pipeline that generates long-term interaction records centered around multimodal personas, followed by human refinement for quality assurance. The resulting dataset contains long conversations from 150 users, each exhibiting visual consistency and dynamic persona development over time. Built on this dataset, we define a suite of tasks to comprehensively assess models' ability to track persona evolution, integrate visual and textual inputs, and apply persona understanding in realistic dialogue scenarios. Extensive experiments on LongMP-Bench highlight the substantial challenges in multimodal persona understanding, especially in tracking persona shifts and leveraging multimodal context effectively. We will release our benchmark and code to facilitate future research in multimodal and personalized dialogue systems."
}Markdown (Informal)
[LongMP-Bench: A Benchmark for Multimodal Persona Understanding in Long-Term Dialogues](https://preview.aclanthology.org/ingest-acl/2026.findings-acl.1159/) (Li et al., Findings 2026)
ACL