@inproceedings{zhao-etal-2025-enhancing-llm,
title = "Enhancing {LLM}-based Hatred and Toxicity Detection with Meta-Toxic Knowledge Graph",
author = "Zhao, Yibo and
Zhu, Jiapeng and
Xu, Can and
Liu, Yao and
Li, Xiang",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/mtsummit-25-ingestion/2025.findings-acl.1270/",
doi = "10.18653/v1/2025.findings-acl.1270",
pages = "24747--24760",
ISBN = "979-8-89176-256-5",
abstract = "The rapid growth of social media platforms has raised significant concerns regarding online content toxicity. When Large Language Models (LLMs) are used for toxicity detection, two key challenges emerge: 1) the absence of domain-specific toxicity knowledge leads to false negatives; 2) the excessive sensitivity of LLMs to toxic speech results in false positives, limiting freedom of speech. To address these issues, we propose a novel method called *MetaTox*, leveraging graph search on a meta-toxic knowledge graph to enhance hatred and toxicity detection. First, we construct a comprehensive meta-toxic knowledge graph by utilizing LLMs to extract toxic information through a three step pipeline. Second, we query the graph via retrieval and ranking processes to supplement accurate, relevant toxicity knowledge. Extensive experiments and case studies across multiple datasets demonstrate that our MetaTox boosts overall toxicity detection performance, particularly in out-of-domain settings. In addition, under in-domain scenarios, we surprisingly find that small language models are more competent. Our code is available at https://github.com/YiboZhao624/MetaTox."
}
Markdown (Informal)
[Enhancing LLM-based Hatred and Toxicity Detection with Meta-Toxic Knowledge Graph](https://preview.aclanthology.org/mtsummit-25-ingestion/2025.findings-acl.1270/) (Zhao et al., Findings 2025)
ACL