@inproceedings{sinha-2026-sycobench,
title = "{S}yco{B}ench-600: Measuring Sycophancy and Correction Selectivity in {LLM} Assistants",
author = "Sinha, Debu",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.findings-acl.1759/",
pages = "35278--35284",
ISBN = "979-8-89176-395-1",
abstract = "Modern instruction-following language models are optimized to be helpful and cooperative, often through preference-based alignment such as RLHF and related methods. A growing body of evidence shows that this training can also induce sycophancy: models may agree with a user even when the user is wrong, undermining reliability in decision support and high-stakes advice. We introduce SycoBench-600, a controlled multiple-choice benchmark that measures (i) susceptibility to three social-pressure perturbations (doubt, authority, and an explicit wrong suggestion) and (ii) correction selectivity, the ability to accept correct suggestions while resisting incorrect ones. The released benchmark contains 600 English MCQ instances over 272 normalized question stems, covers 8 domains and 3 difficulty tiers, and evaluates each instance under 3 fixed paraphrase variants of the perturbation prompts. We evaluate seven widely used assistants spanning proprietary and open-weight families. Results show substantial variation in pressure robustness and selective updating, and further show that willingness to update does not by itself imply selectivity. We release raw logs, validation scripts, and code that regenerates every table and figure from the model outputs."
}Markdown (Informal)
[SycoBench-600: Measuring Sycophancy and Correction Selectivity in LLM Assistants](https://preview.aclanthology.org/ingest-acl/2026.findings-acl.1759/) (Sinha, Findings 2026)
ACL