@inproceedings{zhao-etal-2025-unmasking,
title = "Unmasking Style Sensitivity: A Causal Analysis of Bias Evaluation Instability in Large Language Models",
author = "Zhao, Jiaxu and
Fang, Meng and
Zhang, Kun and
Pechenizkiy, Mykola",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/landing_page/2025.acl-long.796/",
pages = "16314--16338",
ISBN = "979-8-89176-251-0",
abstract = "Natural language processing applications are increasingly prevalent, but social biases in their outputs remain a critical challenge. While various bias evaluation methods have been proposed, these assessments show unexpected instability when input texts undergo minor stylistic changes. This paper conducts a comprehensive analysis of how different style transformations impact bias evaluation results across multiple language models and bias types using causal inference techniques. Our findings reveal that formality transformations significantly affect bias scores, with informal style showing substantial bias reductions (up to 8.33{\%} in LLaMA-2-13B). We identify appearance bias, sexual orientation bias, and religious bias as most susceptible to style changes, with variations exceeding 20{\%}. Larger models demonstrate greater sensitivity to stylistic variations, with bias measurements fluctuating up to 3.1{\%} more than in smaller models. These results highlight critical limitations in current bias evaluation methods and emphasize the need for reliable and fair assessments of language models."
}
Markdown (Informal)
[Unmasking Style Sensitivity: A Causal Analysis of Bias Evaluation Instability in Large Language Models](https://preview.aclanthology.org/landing_page/2025.acl-long.796/) (Zhao et al., ACL 2025)
ACL