@inproceedings{bergeron-etal-2026-halluguard,
title = "{H}allu{G}uard: Evidence-Grounded Small Reasoning Models to Mitigate Hallucinations in Retrieval-Augmented Generation",
author = "Bergeron, Loris and
Buhnila, Ioana and
Francois, Jerome and
State, Radu",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.findings-acl.835/",
pages = "16918--16932",
ISBN = "979-8-89176-395-1",
abstract = "Large Language Models excel at NLP tasks but remain prone to hallucinations, limiting trust in real-world applications. We present HalluGuard, a 4B-parameter Small Reasoning Model (SRM) designed as a guardrail for Retrieval-Augmented Generation (RAG) pipelines, which classify document-claim pairs as grounded or hallucinated in closed-book, document-grounded settings and produces evidence-grounded justifications. Our approach combines (i) a domain-agnostic synthetic dataset derived from FineWeb and refined through multi-stage curation and data reformation, (ii) synthetic grounded and hallucinated claims, and (iii) preference-based fine-tuning with Odds Ratio Preference Optimization (ORPO) to distill large-model reasoning into a smaller backbone. On the RAGTruth subset of the LLM-AggreFact benchmark, HalluGuard achieves 84.4{\%} balanced accuracy (BAcc), surpassing specialized models, MiniCheck (7B; 84.0{\%}) and Granite Guardian 3.3 (8B; 82.2{\%}) while using roughly half their parameters. Across the benchmark, it reaches 77.1{\%} BAcc, surpassing larger general-purpose LLMs such as GPT-4o (75.9{\%}). HalluGuard and datasets will be released upon acceptance."
}Markdown (Informal)
[HalluGuard: Evidence-Grounded Small Reasoning Models to Mitigate Hallucinations in Retrieval-Augmented Generation](https://preview.aclanthology.org/ingest-acl/2026.findings-acl.835/) (Bergeron et al., Findings 2026)
ACL