@article{burema-etal-2026-dutch,
title = "A {D}utch Benchmark to Assess Social Bias in {LLM}s within a Hiring Decision Setting",
author = "Burema, Renate and
Schuth, Anne and
Spelt, Christopher and
Nguyen, Dong",
editor = "Piperidis, Stelios and
Bel, N{\'u}ria and
van den Heuvel, Henk and
Ide, Nancy and
Krek, Simon and
Toral, Antonio",
journal = "International Conference on Language Resources and Evaluation",
volume = "main",
month = may,
year = "2026",
address = "Palma de Mallorca, Spain",
publisher = "ELRA Language Resource Association",
url = "https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.312/",
pages = "3932--3943",
abstract = "In this paper, we present a Dutch benchmark to assess whether large language models (LLMs) exhibit social biases in hiring decisions, focusing on gender and country of origin. We experiment with two approaches: explicit descriptions of the applicants' demographics and using first names as proxies. We evaluate both monolingual and multilingual LLMs and find that all tested models, gpt-4o-mini, claude-3.5-haiku, Geitje-7B-Ultra and EuroLLM-9B-Instruct, exhibit some degree of social bias in their decisions. Furthermore, all models tested are sensitive to the manner in which the prompts are written. We make our benchmark publicly available under an EUPL-1.2 license. The benchmark is available at https://github.com/MinBZK/llm-benchmark/tree/main/benchmarks/social-bias."
}Markdown (Informal)
[A Dutch Benchmark to Assess Social Bias in LLMs within a Hiring Decision Setting](https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.312/) (Burema et al., LREC 2026)
ACL