@inproceedings{bunzeck-zarriess-2024-slayqa,
title = "The {S}lay{QA} benchmark of social reasoning: testing gender-inclusive generalization with neopronouns",
author = "Bunzeck, Bastian and
Zarrie{\ss}, Sina",
editor = "Hupkes, Dieuwke and
Dankers, Verna and
Batsuren, Khuyagbaatar and
Kazemnejad, Amirhossein and
Christodoulopoulos, Christos and
Giulianelli, Mario and
Cotterell, Ryan",
booktitle = "Proceedings of the 2nd GenBench Workshop on Generalisation (Benchmarking) in NLP",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.genbench-1.3/",
doi = "10.18653/v1/2024.genbench-1.3",
pages = "42--53",
abstract = "We introduce SlayQA, a novel benchmark data set designed to evaluate language models' ability to handle gender-inclusive language, specifically the use of neopronouns, in a question-answering setting. Derived from the Social IQa data set, SlayQA modifies context-question-answer triples to include gender-neutral pronouns, creating a significant linguistic distribution shift in comparison to common pre-training corpora like C4 or Dolma. Our results show that state-of-the-art language models struggle with the challenge, exhibiting small, but noticeable performance drops when answering question containing neopronouns compared to those without."
}
Markdown (Informal)
[The SlayQA benchmark of social reasoning: testing gender-inclusive generalization with neopronouns](https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.genbench-1.3/) (Bunzeck & Zarrieß, GenBench 2024)
ACL