@inproceedings{liu-etal-2025-assistant,
title = "Assistant-Guided Mitigation of Teacher Preference Bias in {LLM}-as-a-Judge",
author = "Liu, Zhuo and
Li, Moxin and
Deng, Xun and
Wang, Qifan and
Feng, Fuli",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.findings-emnlp.510/",
doi = "10.18653/v1/2025.findings-emnlp.510",
pages = "9610--9631",
ISBN = "979-8-89176-335-7",
abstract = "LLM-as-a-Judge employs large language models (LLMs), such as GPT-4, to evaluate the quality of LLM-generated responses, gaining popularity for its cost-effectiveness and strong alignment with human evaluations. However, training proxy judge models using evaluation data generated by powerful teacher models introduces a critical yet previously overlooked issue: teacher preference bias, where the proxy judge model learns a biased preference for responses from the teacher model. To tackle this problem, we propose a novel setting that incorporates an additional assistant model, which is not biased toward the teacher model{'}s responses, to complement the training data. Building on this setup, we introduce AGDe-Judge, a three-stage framework designed to debias from both the labels and feedbacks in the training data. Extensive experiments demonstrate that AGDe-Judge effectively reduces teacher preference bias while maintaining strong performance across six evaluation benchmarks. ."
}Markdown (Informal)
[Assistant-Guided Mitigation of Teacher Preference Bias in LLM-as-a-Judge](https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.findings-emnlp.510/) (Liu et al., Findings 2025)
ACL