@inproceedings{sobhani-delany-2024-towards,
title = "Towards Fairer {NLP} Models: Handling Gender Bias In Classification Tasks",
author = "Sobhani, Nasim and
Delany, Sarah",
editor = "Fale{\'n}ska, Agnieszka and
Basta, Christine and
Costa-juss{\`a}, Marta and
Goldfarb-Tarrant, Seraphina and
Nozza, Debora",
booktitle = "Proceedings of the 5th Workshop on Gender Bias in Natural Language Processing (GeBNLP)",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.gebnlp-1.10/",
doi = "10.18653/v1/2024.gebnlp-1.10",
pages = "167--178",
abstract = "Measuring and mitigating gender bias in natural language processing (NLP) systems is crucial to ensure fair and ethical AI. However, a key challenge is the lack of explicit gender information in many textual datasets. This paper proposes two techniques, Identity Term Sampling (ITS) and Identity Term Pattern Extraction (ITPE), as alternatives to template-based approaches for measuring gender bias in text data. These approaches identify test data for measuring gender bias in the dataset itself and can be used to measure gender bias on any NLP classifier. We demonstrate the use of these approaches for measuring gender bias across various NLP classification tasks, including hate speech detection, fake news identification, and sentiment analysis. Additionally, we show how these techniques can benefit gender bias mitigation, proposing a variant of Counterfactual Data Augmentation (CDA), called Gender-Selective CDA (GS-CDA), which reduces the amount of data augmentation required in training data while effectively mitigating gender bias and maintaining overall classification performance."
}
Markdown (Informal)
[Towards Fairer NLP Models: Handling Gender Bias In Classification Tasks](https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.gebnlp-1.10/) (Sobhani & Delany, GeBNLP 2024)
ACL