@inproceedings{gu-etal-2025-umad,
title = "{UMAD}: Enhancing {LLM} Debiasing via Multi-Agent Debate and Token-Level Bias Interpretation",
author = "Gu, Hanwen and
JieMa, JieMa and
Qin, Ying and
Hu, Ling",
editor = "Sun, Maosong and
Duan, Peiyong and
Liu, Zhiyuan and
Xu, Ruifeng and
Sun, Weiwei",
booktitle = "Proceedings of the 24th {C}hina National Conference on Computational Linguistics ({CCL} 2025)",
month = aug,
year = "2025",
address = "Jinan, China",
publisher = "Chinese Information Processing Society of China",
url = "https://preview.aclanthology.org/ingest-ccl/2025.ccl-1.81/",
pages = "1078--1094",
abstract = "``Textual data often contain biases that compromise fairness in AI systems, particularly in sensitive areas such as gender, race, and politics. While large language models (LLMs) have shown success across various tasks, they still face limitations due to inherent biases within the model sand restrictive safety policies that hinder direct bias mitigation. To overcome these challenges,we propose UMAD (Unsupervised Multi-Agent Debate), a novel framework that leverages aMulti-Agent Debate mechanism alongside Best-Worst Scaling (BWS) to foster more effective discussions among LLMs, facilitating the identification of biases. By combining this with gradient-based interpretation techniques, UMAD extracts token-level bias insights, which are then integrated into models using in-context learning. This enhances the debiasing performance, as shown by our experiments across three bias categories{---}gender, religion, and politics{---}using five different LLMs. Our approach demonstrates significant improvements in metrics, with large models matching or even surpassing GPT-4 in Style Accuracy (STA). We release our code at:https://github.com/Couen/UMAD.git.''"
}Markdown (Informal)
[UMAD: Enhancing LLM Debiasing via Multi-Agent Debate and Token-Level Bias Interpretation](https://preview.aclanthology.org/ingest-ccl/2025.ccl-1.81/) (Gu et al., CCL 2025)
ACL