@inproceedings{galimzianova-etal-2025-rag,
title = "From {RAG} to Reality: Coarse-Grained Hallucination Detection via {NLI} Fine-Tuning",
author = "Galimzianova, Daria and
Boriskin, Aleksandr and
Arshinov, Grigory",
editor = "Ghosal, Tirthankar and
Mayr, Philipp and
Singh, Amanpreet and
Naik, Aakanksha and
Rehm, Georg and
Freitag, Dayne and
Li, Dan and
Schimmler, Sonja and
De Waard, Anita",
booktitle = "Proceedings of the Fifth Workshop on Scholarly Document Processing (SDP 2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/landing_page/2025.sdp-1.34/",
doi = "10.18653/v1/2025.sdp-1.34",
pages = "353--359",
ISBN = "979-8-89176-265-7",
abstract = "We present our submission to SciHal Subtask 1: coarse-grained hallucination detection for scientific question answering. We frame hallucination detection as an NLI-style three-way classification (entailment, contradiction, unverifiable) and show that simple fine-tuning of NLI-adapted encoder models on task data outperforms more elaborate feature-based pipelines and large language model prompting. In particular, DeBERTa-V3-large, a model pretrained on five diverse NLI corpora, achieves the highest weighted F1 on the public leaderboard. We additionally explore a pipeline combining joint claim{--}reference embeddings and NLI softmax probabilities fed into a classifier, but find its performance consistently below direct encoder fine-tuning. Our findings demonstrate that, for reference-grounded hallucination detection, targeted encoder fine-tuning remains the most accurate and efficient approach."
}
Markdown (Informal)
[From RAG to Reality: Coarse-Grained Hallucination Detection via NLI Fine-Tuning](https://preview.aclanthology.org/landing_page/2025.sdp-1.34/) (Galimzianova et al., sdp 2025)
ACL