@inproceedings{gautam-etal-2026-energy,
title = "The Energy of Falsehood: Detecting Hallucinations via Diffusion Model Likelihoods",
author = "Gautam, Arpit Singh and
Talreja, Kailash and
Jha, Saurabh",
editor = "Akhtar, Mubashara and
Aly, Rami and
Cao, Rui and
Christodoulopoulos, Christos and
Cocarascu, Oana and
Guo, Zhijiang and
Mittal, Arpit and
Schlichtkrull, Michael and
Thorne, James and
Vlachos, Andreas",
booktitle = "Proceedings of the Ninth Fact Extraction and {VER}ification Workshop ({FEVER})",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/credits/2026.fever-1.4/",
pages = "47--58",
ISBN = "979-8-89176-365-4",
abstract = "Large Language Models (LLMs) frequently ``hallucinate'' plausible but incorrect assertions, a vulnerability often missed by uncertainty metrics when models are ``confidently wrong.'' We propose DiffuTruth, an unsupervised framework that re-conceptualizes fact verification via non-equilibrium thermodynamics, positing that factual truths act as stable attractors on a generative manifold while hallucinations are unstable. We introduce the ``Generative Stress Test'': claims are corrupted with noise and reconstructed using a discrete text diffusion model. We define Semantic Energy, a metric measuring the semantic divergence between the original claim and its reconstruction using an NLI critic. Unlike vector-space errors, Semantic Energy isolates deep factual contradictions. We further propose a Hybrid Calibration fusing this stability signal with discriminative confidence. Extensive experiments on FEVER demonstrate DiffuTruth achieves a state-of-the-art unsupervised AUROC of 0.725, outperforming baselines by +1.5{\%} through the correction of overconfident predictions. Furthermore, we show superior zero-shot generalization on the multi-hop HOVER dataset, outperforming baselines by over 4{\%}, confirming the robustness of thermodynamic truth properties to distribution shifts."
}Markdown (Informal)
[The Energy of Falsehood: Detecting Hallucinations via Diffusion Model Likelihoods](https://preview.aclanthology.org/credits/2026.fever-1.4/) (Gautam et al., FEVER 2026)
ACL