@inproceedings{zhong-etal-2025-crab,
title = "{CRAB}: A Benchmark for Evaluating Curation of Retrieval-Augmented {LLM}s in Biomedicine",
author = "Zhong, Hanmeng and
Chen, Linqing and
Wu, Wentao and
Wang, Weilei",
editor = "Potdar, Saloni and
Rojas-Barahona, Lina and
Montella, Sebastien",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: Industry Track",
month = nov,
year = "2025",
address = "Suzhou (China)",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-industry.3/",
pages = "34--49",
ISBN = "979-8-89176-333-3",
abstract = "Recent development in Retrieval-Augmented Large Language Models (LLMs) have shown great promise in biomedical applications. However, a critical gap persists in reliably evaluating their curation ability{---}the process by which models select and integrate relevant references while filtering out noise. To address this, we introduce the benchmark for Curation of Retrieval-Augmented LLMs in Biomedicine (CRAB), the first multilingual benchmark tailored for evaluating the biomedical curation of retrieval-augmented LLMs, available in English, French, German and Chinese. By incorporating a novel citation-based evaluation metric, CRAB quantifies the curation performance of retrieval-augmented LLMs in biomedicine. Experimental results reveal significant discrepancies in the curation performance of mainstream LLMs, underscoring the urgent need to improve it in the domain of biomedicine."
}Markdown (Informal)
[CRAB: A Benchmark for Evaluating Curation of Retrieval-Augmented LLMs in Biomedicine](https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-industry.3/) (Zhong et al., EMNLP 2025)
ACL