@inproceedings{lasko-etal-2026-improving,
title = "Improving Medical Hallucination Detection with System Combination and Rule-based Customization",
author = "Lasko, Jonathan and
Karakos, Damianos and
Keith, Francis",
editor = "Mysore, Sheshera and
Kumar, Sachin and
Balachandran, Vidhisha and
Hayati, Shirley Anugrah and
Brahman, Faeze and
Moussa, Hanane Nour and
Salemi, Alireza",
booktitle = "Proceedings of the Second Workshop on Customizable {NLP}: Progress and Challenges in Customizing {NLP} for a Domain, Application, Group, or Individual ({C}ustom{NLP}4{U})",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.customnlp4u-1.14/",
pages = "160--166",
ISBN = "979-8-89176-396-8",
abstract = "The presence of factuality errors (hallucinations) in the outputs of patient-facing medical chatbots is a serious problem: they can lead to patient harm and erode people{'}s trust in the medical profession. For this reason, it is crucial to detect hallucinations in chatbot outputs and forward them to clinicians for review. In this paper, we present the system we built for detecting such errors: it consists of multiple LLM-powered detectors which are combined together with a novel alignment procedure. We ran our system on the MedExpert-Benchmark dataset (Yarmohammadi et al., 2025) and our results on two use cases, Mental Health and Prenatal Care, show that the combined system gives nice gains over the individual systems. Additionally, we show that further customization of the system to each one of the use cases leads to further gains, but at the cost of reduced generalizability. Our code and dataset are available here: https://github.com/BBN-E/medic-customnlp4u."
}Markdown (Informal)
[Improving Medical Hallucination Detection with System Combination and Rule-based Customization](https://preview.aclanthology.org/ingest-acl-workshops/2026.customnlp4u-1.14/) (Lasko et al., CustomNLP4U 2026)
ACL