@inproceedings{pucci-ranaldi-2025-advancing,
title = "Advancing Oversight Reasoning across Languages for Audit Sycophantic Behaviour via {X}-Agent",
author = "Pucci, Giulia and
Ranaldi, Leonardo",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-main.654/",
pages = "12960--12976",
ISBN = "979-8-89176-332-6",
abstract = "Large language models (LLMs) have demonstrated capabilities that are highly satisfactory to a wide range of users by adapting to their culture and wisdom. Yet, this can translate into a propensity to produce responses that align with users' viewpoints, even when the latter are wrong. This behaviour is known as sycophancy, the tendency of LLMs to generate misleading responses as long as they align with the user{'}s, inducing bias and reducing reliability. To make interactions consistent, reliable and safe, we introduce X-Agent, an Oversight Reasoning framework that audits human{--}LLM dialogues, reasons about them, captures sycophancy and corrects the final outputs. Concretely, X-Agent extends debate-based frameworks by (i) auditing human-LLM conversations, (ii) applying a defence layer that steers model behaviour and goes beyond user beliefs, and (iii) extracting reasoning traces from evaluations that serve as training signals for mitigating sycophancy. We evaluate X-Agent across diverse scenarios and languages, showing that it consistently detects sycophancy, reduces unwarranted agreement, and improves cross-turn consistency, advancing a reasoning-as-overview paradigm for safer LLM interaction. Our approach introduces a novel paradigm in which reasoning is not merely a means to solve problems, but as a mechanism for overseeing the problem-solving processes of other models."
}Markdown (Informal)
[Advancing Oversight Reasoning across Languages for Audit Sycophantic Behaviour via X-Agent](https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-main.654/) (Pucci & Ranaldi, EMNLP 2025)
ACL