@inproceedings{elmofty-leser-2026-retrieval,
title = "When Does Retrieval Beat Direct {LLM} Diagnosis in Rare Disease? An Empirical Study of Ontology Coverage",
author = "Elmofty, Mohamed and
Leser, Ulf",
editor = "Demner-Fushman, Dina and
Ananiadou, Sophia and
Roberts, Kirk and
Tsujii, Junichi",
booktitle = "{B}io{NLP} 2026",
month = jul,
year = "2026",
address = "San Diego, California",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.bionlp-1.41/",
pages = "508--518",
ISBN = "979-8-89176-434-7",
abstract = "Recent high-complexity agentic systems such as DeepRare perform strongly on rare disease diagnosis benchmarks, but it remains unclear when gains come from structured knowledge access and when they come from parametric LLM knowledge. We compare phenotypebased retrieval, LLM reranking, and unrestricted LLM diagnosis across seven benchmarks covering 10,382 cases. We find a clear performance crossover driven by retrieval coverage?the fraction of cases whose true diagnosis is within the retriever{'}s top-50: on highcoverage datasets, ontology-based retrieval dominates; on low-coverage datasets, openended LLM diagnosis takes the lead. Building on this, adding an LLM reranker over retrieved candidates further improves accuracy across our patient-case benchmarks, closing most of the remaining gap to agentic systems (within 2 pp on MME and LIRICAL). We trace the crossover to two structural failure modes of ontology-based retrieval?annotation sparsity and phenotypic homogeneity?and show that aggregate scores across mixed benchmarks can hide these qualitatively different diagnostic settings. These findings motivate per-dataset evaluation and hybrid diagnostic systems that combine retrieval, reranking, and parametric LLM generation based on case characteristics."
}Markdown (Informal)
[When Does Retrieval Beat Direct LLM Diagnosis in Rare Disease? An Empirical Study of Ontology Coverage](https://preview.aclanthology.org/ingest-acl-workshops/2026.bionlp-1.41/) (Elmofty & Leser, BioNLP 2026)
ACL