@inproceedings{sicilia-etal-2025-accounting,
title = "Accounting for Sycophancy in Language Model Uncertainty Estimation",
author = "Sicilia, Anthony and
Inan, Mert and
Alikhani, Malihe",
editor = "Chiruzzo, Luis and
Ritter, Alan and
Wang, Lu",
booktitle = "Findings of the Association for Computational Linguistics: NAACL 2025",
month = apr,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2025.findings-naacl.438/",
pages = "7851--7866",
ISBN = "979-8-89176-195-7",
abstract = "Effective human-machine collaboration requires machine learning models to externalize uncertainty, so users can reflect and intervene when necessary. For language models, these representations of uncertainty may be impacted by sycophancy bias: proclivity to agree with users, even if they are wrong. For instance, models may be over-confident in (incorrect) problem solutions suggested by a user. We study the relationship between sycophancy and uncertainty estimation for the first time. We propose a generalization of the definition of sycophancy bias to measure downstream impacts on uncertainty estimation, and also propose a new algorithm (SyRoUP) to account for sycophancy in the uncertainty estimation process. Unlike previous works, we study a broad array of user behaviors, varying both correctness and confidence of user suggestions to see how model answers (and their certainty) change. Our experiments across conversation forecasting and question-answering tasks show that user confidence plays a critical role in modulating the effects of sycophancy, and that SyRoUP can better predict these effects. From these results, we argue that externalizing both model \textit{and} user uncertainty can help to mitigate the impacts of sycophancy bias."
}
Markdown (Informal)
[Accounting for Sycophancy in Language Model Uncertainty Estimation](https://preview.aclanthology.org/fix-sig-urls/2025.findings-naacl.438/) (Sicilia et al., Findings 2025)
ACL