@inproceedings{chang-etal-2025-monitoring,
title = "Monitoring Decoding: Mitigating Hallucination via Evaluating the Factuality of Partial Response during Generation",
author = "Chang, Yurui and
Cao, Bochuan and
Lin, Lu",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/mtsummit-25-ingestion/2025.findings-acl.752/",
doi = "10.18653/v1/2025.findings-acl.752",
pages = "14574--14587",
ISBN = "979-8-89176-256-5",
abstract = "While large language models have demonstrated exceptional performance across a wide range of tasks, they remain susceptible to hallucinations {--} generating plausible yet factually incorrect contents. Existing methods to mitigating such risk often rely on sampling multiple full-length generations, which introduces significant response latency and becomes ineffective when the model consistently produces hallucinated outputs with high confidence. To address these limitations, we introduce Monitoring Decoding (MD), a novel framework that dynamically monitors the generation process and selectively applies in-process interventions, focusing on revising crucial tokens responsible for hallucinations. Instead of waiting until completion of multiple full-length generations, we identify hallucination-prone tokens during generation using a monitor function, and further refine these tokens through a tree-based decoding strategy. This approach ensures an enhanced factual accuracy and coherence in the generated output while maintaining efficiency. Experimental results demonstrate that MD consistently outperforms self-consistency-based approaches in both effectiveness and efficiency, achieving higher factual accuracy while significantly reducing computational overhead."
}
Markdown (Informal)
[Monitoring Decoding: Mitigating Hallucination via Evaluating the Factuality of Partial Response during Generation](https://preview.aclanthology.org/mtsummit-25-ingestion/2025.findings-acl.752/) (Chang et al., Findings 2025)
ACL