@inproceedings{ma-etal-2025-scalable,
title = "Scalable and Culturally Specific Stereotype Dataset Construction via Human-{LLM} Collaboration",
author = "Ma, Weicheng and
Guerrerio, John J. and
Vosoughi, Soroush",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-main.1221/",
pages = "23939--23967",
ISBN = "979-8-89176-332-6",
abstract = "Research on stereotypes in large language models (LLMs) has largely focused on English-speaking contexts, due to the lack of datasets in other languages and the high cost of manual annotation in underrepresented cultures. To address this gap, we introduce a cost-efficient human-LLM collaborative annotation framework and apply it to construct \textit{EspanStereo}, a Spanish-language stereotype dataset spanning multiple Spanish-speaking countries across Europe and Latin America. EspanStereo captures both well-documented stereotypes from prior literature and culturally specific biases absent from English-centric resources. Using LLMs to generate candidate stereotypes and in-culture annotators to validate them, we demonstrate the framework{'}s effectiveness in identifying nuanced, region-specific biases. Our evaluation of Spanish-supporting LLMs using EspanStereo reveals significant variation in stereotypical behavior across countries, highlighting the need for more culturally grounded assessments. Beyond Spanish, our framework is adaptable to other languages and regions, offering a scalable path toward multilingual stereotype benchmarks. This work broadens the scope of stereotype analysis in LLMs and lays the groundwork for comprehensive cross-cultural bias evaluation."
}Markdown (Informal)
[Scalable and Culturally Specific Stereotype Dataset Construction via Human-LLM Collaboration](https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-main.1221/) (Ma et al., EMNLP 2025)
ACL