@inproceedings{schopf-etal-2025-natural,
title = "Natural Language Inference Fine-tuning for Scientific Hallucination Detection",
author = {Schopf, Tim and
Vladika, Juraj and
F{\"a}rber, Michael and
Matthes, Florian},
editor = "Ghosal, Tirthankar and
Mayr, Philipp and
Singh, Amanpreet and
Naik, Aakanksha and
Rehm, Georg and
Freitag, Dayne and
Li, Dan and
Schimmler, Sonja and
De Waard, Anita",
booktitle = "Proceedings of the Fifth Workshop on Scholarly Document Processing (SDP 2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/landing_page/2025.sdp-1.33/",
doi = "10.18653/v1/2025.sdp-1.33",
pages = "344--352",
ISBN = "979-8-89176-265-7",
abstract = "Modern generative Large Language Models (LLMs) are capable of generating text that sounds coherent and convincing, but are also prone to producing \textit{hallucinations}, facts that contradict the world knowledge. Even in the case of Retrieval-Augmented Generation (RAG) systems, where relevant context is first retrieved and passed in the input, the generated facts can contradict or not be verifiable by the provided references. This has motivated SciHal 2025, a shared task that focuses on the detection of hallucinations for scientific content. The two subtasks focused on: (1) predicting whether a claim from a generated LLM answer is entailed, contradicted, or unverifiable by the used references; (2) predicting a fine-grained category of erroneous claims. Our best performing approach used an ensemble of fine-tuned encoder-only ModernBERT and DeBERTa-v3 models for classification. Out of nine competing teams, our approach achieved the first place in sub-task 1 and the second place in sub-task 2."
}
Markdown (Informal)
[Natural Language Inference Fine-tuning for Scientific Hallucination Detection](https://preview.aclanthology.org/landing_page/2025.sdp-1.33/) (Schopf et al., sdp 2025)
ACL