@inproceedings{han-etal-2026-evidence,
title = "When Evidence Conflicts: Uncertainty and Order Effects in Retrieval-Augmented Biomedical Question Answering",
author = "Han, Yikun and
Lan, Mengfei and
Kilicoglu, Halil",
editor = "Demner-Fushman, Dina and
Ananiadou, Sophia and
Roberts, Kirk and
Tsujii, Junichi",
booktitle = "{B}io{NLP} 2026",
month = jul,
year = "2026",
address = "San Diego, California",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.bionlp-1.50/",
pages = "630--643",
ISBN = "979-8-89176-434-7",
abstract = "Biomedical retrieval-augmented LLMs are often evaluated under helpful retrieved context, but in practice the evidence can also be misleading or internally conflicting. This paper studies uncertainty under those harder settings using the HealthContradict benchmark and six open-weight models. We evaluate five controlled evidence conditions: no context, correct-only context, incorrect-only context, and two mixed conditions that contain the same correct and contradictory documents in opposite orders. Correct evidence improves both accuracy and calibration, while incorrect evidence substantially degrades both. Under conflicting evidence, document order also matters: reversing the order of the same two documents changes 11.4{\%}{--}25.2{\%} of predictions and consistently reduces performance when the incorrect document appears first. We further evaluate a conflict-aware abstention score that combines model confidence with a detector of evidence conflict. In the two hardest conditions, incorrect-only and incorrect-first conflict, this score improves selective accuracy over confidence-only abstention, with mean gains of 7.2{--}33.4 and 3.6{--}14.4 points across 75{\%}, 50{\%}, and 25{\%} coverage. These results show that biomedical RAG systems should be evaluated not only under helpful retrieval, but also under misleading and conflicting evidence."
}Markdown (Informal)
[When Evidence Conflicts: Uncertainty and Order Effects in Retrieval-Augmented Biomedical Question Answering](https://preview.aclanthology.org/ingest-acl-workshops/2026.bionlp-1.50/) (Han et al., BioNLP 2026)
ACL