@inproceedings{nozza-etal-2021-honest,
title = "{HONEST}: Measuring Hurtful Sentence Completion in Language Models",
author = "Nozza, Debora and
Bianchi, Federico and
Hovy, Dirk",
editor = "Toutanova, Kristina and
Rumshisky, Anna and
Zettlemoyer, Luke and
Hakkani-Tur, Dilek and
Beltagy, Iz and
Bethard, Steven and
Cotterell, Ryan and
Chakraborty, Tanmoy and
Zhou, Yichao",
booktitle = "Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies",
month = jun,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2021.naacl-main.191/",
doi = "10.18653/v1/2021.naacl-main.191",
pages = "2398--2406",
abstract = "Language models have revolutionized the field of NLP. However, language models capture and proliferate hurtful stereotypes, especially in text generation. Our results show that 4.3{\%} of the time, language models complete a sentence with a hurtful word. These cases are not random, but follow language and gender-specific patterns. We propose a score to measure hurtful sentence completions in language models (HONEST). It uses a systematic template- and lexicon-based bias evaluation methodology for six languages. Our findings suggest that these models replicate and amplify deep-seated societal stereotypes about gender roles. Sentence completions refer to sexual promiscuity when the target is female in 9{\%} of the time, and in 4{\%} to homosexuality when the target is male. The results raise questions about the use of these models in production settings."
}
Markdown (Informal)
[HONEST: Measuring Hurtful Sentence Completion in Language Models](https://preview.aclanthology.org/fix-sig-urls/2021.naacl-main.191/) (Nozza et al., NAACL 2021)
ACL
- Debora Nozza, Federico Bianchi, and Dirk Hovy. 2021. HONEST: Measuring Hurtful Sentence Completion in Language Models. In Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, pages 2398–2406, Online. Association for Computational Linguistics.