@inproceedings{cha-lee-2024-pre,
title = "Pre-trained Language Models Return Distinguishable Probability Distributions to Unfaithfully Hallucinated Texts",
author = "Cha, Taehun and
Lee, Donghun",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.findings-emnlp.738/",
doi = "10.18653/v1/2024.findings-emnlp.738",
pages = "12630--12639",
abstract = "In this work, we show the pre-trained language models return distinguishable generation probability and uncertainty distribution to unfaithfully hallucinated texts, regardless of their size and structure. By examining 24 models on 6 data sets, we find out that 88-98{\%} of cases return statistically significantly distinguishable generation probability and uncertainty distributions. Using this general phenomenon, we showcase a hallucination-reducing training algorithm. Our algorithm outperforms other baselines by achieving higher faithfulness metrics while maintaining sound general text quality measures."
}
Markdown (Informal)
[Pre-trained Language Models Return Distinguishable Probability Distributions to Unfaithfully Hallucinated Texts](https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.findings-emnlp.738/) (Cha & Lee, Findings 2024)
ACL