@inproceedings{xu-etal-2025-ssa,
title = "{SSA}: Semantic Contamination of {LLM}-Driven Fake News Detection",
author = "Xu, Cheng and
Yan, Nan and
Guan, Shuhao and
Mei, Yuke and
Kechadi, Tahar",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-main.744/",
pages = "14748--14762",
ISBN = "979-8-89176-332-6",
abstract = "Benchmark data contamination (BDC) silently inflate the evaluation performance of large language models (LLMs), yet current work on BDC has centered on direct token overlap (data/label level), leaving the subtler and equally harmful semantic level BDC largely unexplored. This gap is critical in fake news detection task, where prior exposure to semantic BDC lets a model ``remember'' the answer instead of reasoning. In this work, (1) we are the first to formally define semantic contamination for this task and (2) introduce the Semantic Sensitivity Amplifier (SSA), a lightweight, model-agnostic framework that detects BDC risks across semantic to label level via an entity shift perturbation and a comprehensive interpretable metric, the SSA Factor. Evaluating 45 variants of nine LLMs (0.5B{--}72B parameters) across four BDC levels, we find LIAR2 accuracy climbs monotonically with injected contamination, while the SSA Factor escalates in near-perfect lock-step ($r\geq$.97, for models $\geq$3B, $p<$.05; $\rho \geq$.9 overall, $p<$.05). These results show that SSA provides a sensitive and scalable audit of comprehensive BDC risk and paves the way for a more integrity evaluation of the LLM-driven fake news detection task."
}Markdown (Informal)
[SSA: Semantic Contamination of LLM-Driven Fake News Detection](https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-main.744/) (Xu et al., EMNLP 2025)
ACL