@inproceedings{wang-liu-2025-beyond,
title = "Beyond Generation: Leveraging {LLM} Creativity to Overcome Label Bias in Classification",
author = "Wang, Xiaoyue and
Liu, Xin",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingestion-acl-25/2025.findings-acl.1307/",
pages = "25500--25506",
ISBN = "979-8-89176-256-5",
abstract = "Large Language Models (LLMs) exhibit impressive capabilities in In-Context Learning (ICL) but are prone to label bias{---}an undesirable tendency to favor certain answers. Existing calibration methods mitigate bias by leveraging in-domain data, yet such data is often unavailable in real-world scenarios. To address this limitation, we propose SDC (Synthetic Data Calibration), a simple-yet-effective approach that generates synthetic in-domain data from a few in-context demonstrations and utilizes it for calibration. By approximating the benefits of real in-domain data, SDC effectively reduces label bias without requiring access to actual domain-specific inputs. Experimental evaluations on 279 classification and multiple-choice tasks from the Super-NaturalInstructions benchmark. The results show that SDC significantly reduces label bias, achieving an average Bias Score reduction of 57.5{\%}, and outperforming all competitive baselines. Moreover, when combined with Leave-One-Out Calibration (LOOC), further improves performance, underscoring its effectiveness and generalizability in enhancing the reliability of LLMs."
}
Markdown (Informal)
[Beyond Generation: Leveraging LLM Creativity to Overcome Label Bias in Classification](https://preview.aclanthology.org/ingestion-acl-25/2025.findings-acl.1307/) (Wang & Liu, Findings 2025)
ACL