@inproceedings{hida-etal-2025-social,
title = "Social Bias Evaluation for Large Language Models Requires Prompt Variations",
author = "Hida, Rem and
Kaneko, Masahiro and
Okazaki, Naoaki",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.findings-emnlp.783/",
doi = "10.18653/v1/2025.findings-emnlp.783",
pages = "14507--14530",
ISBN = "979-8-89176-335-7",
abstract = "Warning: This paper contains examples of stereotypes and biases. Large Language Models (LLMs) exhibit considerable social biases, and various studies have tried to evaluate and mitigate these biases accurately. Previous studies use downstream tasks to examine the degree of social biases for evaluation and mitigation. While the output of LLMs highly depends on prompts, prior works evaluating and mitigating bias have often relied on a limited variety of prompts. In this paper, we investigate the sensitivity of LLMs when changing prompt variations (task instruction, few-shot examples, debias-prompt) by analyzing task performance and social bias of LLMs. Our experimental results reveal that LLM rankings fluctuate across prompts for both task performance and social bias. We also confirmed that the impact of format changes can differ for each bias category. Performance improvement from prompt settings may not result in reduced bias. Moreover, the ambiguity of instances is a common factor in LLM sensitivity to prompts across advanced LLMs. We recommend using diverse prompts, as in this study, to compare the effects of prompts on social bias in LLMs."
}Markdown (Informal)
[Social Bias Evaluation for Large Language Models Requires Prompt Variations](https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.findings-emnlp.783/) (Hida et al., Findings 2025)
ACL