@inproceedings{koo-etal-2026-know,
title = "{I} Know, but {I} Don{'}t Know! How Persona Conflict Undermines Instruction Adherence in Large Language Models",
author = "Koo, Seonmin and
Kim, Jinsung and
Lim, Heuiseok",
editor = "Demberg, Vera and
Inui, Kentaro and
Marquez, Llu{\'i}s",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {EACL} 2026",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-eacl/2026.findings-eacl.24/",
pages = "465--489",
ISBN = "979-8-89176-386-9",
abstract = "Large Language Models (LLMs) are expected to generate appropriate responses while adhering to predefined prior constraints or knowledge, such as user personas, across various dialogue scenarios. However, real-world interactions frequently involve semantic conflicts between such prior information and actual user-provided inputs. Despite this, prior studies on persona-grounded dialogue{---}one of the representative tasks in personal preference modeling{---}have predominantly assumed idealized scenarios where persona and user utterances are fully aligned. To bridge this gap, we introduce and formalize the notion of persona conflict, wherein predefined personas contradict the personal information expressed by the user during interaction. We present a systematic verification framework to examine model behavior under such conflict scenarios. In detail, we propose a taxonomy that categorizes model behaviors into three distinct response types (adhering, sycophantic, and wavering) and develop a measurement schema grounded in this taxonomy. Our study provides a comprehensive analysis of the persona conflict phenomenon, identifying diverse key behavioral factors. Extensive experiments and in-depth analysis provide new insights into designing robust dialogue models capable of managing persona inconsistencies."
}Markdown (Informal)
[I Know, but I Don’t Know! How Persona Conflict Undermines Instruction Adherence in Large Language Models](https://preview.aclanthology.org/ingest-eacl/2026.findings-eacl.24/) (Koo et al., Findings 2026)
ACL