@inproceedings{hong-etal-2025-measuring,
title = "Measuring Sycophancy of Language Models in Multi-turn Dialogues",
author = "Hong, Jiseung and
Byun, Grace and
Kim, Seungone and
Shu, Kai",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.findings-emnlp.121/",
doi = "10.18653/v1/2025.findings-emnlp.121",
pages = "2239--2259",
ISBN = "979-8-89176-335-7",
abstract = "Large Language Models (LLMs) are expected to provide helpful and harmless responses, yet they often exhibit \textit{sycophancy}{---}conforming to user beliefs regardless of factual accuracy or ethical soundness. Prior research on sycophancy has primarily focused on single-turn factual correctness, overlooking the dynamics of real-world interactions. In this work, we introduce \textbf{SYCON Bench} (\textbf{SY}cophantic \textbf{CON}formity benchmark), a novel evaluation suite that assesses sycophantic behavior in multi-turn, free-form conversational settings. Our benchmark measures how quickly a model conforms to the user (\textit{Turn of Flip}) and how frequently it shifts its stance under sustained user pressure (\textit{Number of Flip}). Applying SYCON Bench to 17 LLMs across three real-world scenarios, we find that sycophancy remains a prevalent failure mode. Our analysis shows that alignment tuning amplifies sycophantic behavior, whereas model scaling and reasoning optimization strengthen the model{'}s ability to resist undesirable user views. Reasoning models generally outperform instruction-tuned models but often fail when they over-index on logical exposition instead of directly addressing the user{'}s underlying beliefs. Finally, we evaluate four additional prompting strategies and demonstrate that adopting a third-person perspective reduces sycophancy by up to 63.8{\%} in debate scenario."
}Markdown (Informal)
[Measuring Sycophancy of Language Models in Multi-turn Dialogues](https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.findings-emnlp.121/) (Hong et al., Findings 2025)
ACL