@inproceedings{zhang-etal-2023-mitigating,
title = "Mitigating Biases in Hate Speech Detection from A Causal Perspective",
author = "Zhang, Zhehao and
Chen, Jiaao and
Yang, Diyi",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2023",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2023.findings-emnlp.440/",
doi = "10.18653/v1/2023.findings-emnlp.440",
pages = "6610--6625",
abstract = "Nowadays, many hate speech detectors are built to automatically detect hateful content. However, their training sets are sometimes skewed towards certain stereotypes (e.g., race or religion-related). As a result, the detectors are prone to depend on some shortcuts for predictions. Previous works mainly focus on token-level analysis and heavily rely on human experts' annotations to identify spurious correlations, which is not only costly but also incapable of discovering higher-level artifacts. In this work, we use grammar induction to find grammar patterns for hate speech and analyze this phenomenon from a causal perspective. Concretely, we categorize and verify different biases based on their spuriousness and influence on the model prediction. Then, we propose two mitigation approaches including Multi-Task Intervention and Data-Specific Intervention based on these confounders. Experiments conducted on 9 hate speech datasets demonstrate the effectiveness of our approaches."
}
Markdown (Informal)
[Mitigating Biases in Hate Speech Detection from A Causal Perspective](https://preview.aclanthology.org/fix-sig-urls/2023.findings-emnlp.440/) (Zhang et al., Findings 2023)
ACL