@inproceedings{yu-etal-2025-scrag,
title = "sc{RAG}: Hybrid Retrieval-Augmented Generation for {LLM}-based Cross-Tissue Single-Cell Annotation",
author = "Yu, Zhiyin and
Zheng, Chao and
Chen, Chong and
Hua, Xian-Sheng and
Luo, Xiao",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/landing_page/2025.findings-acl.53/",
pages = "954--970",
ISBN = "979-8-89176-256-5",
abstract = "In recent years, large language models (LLMs) such as GPT-4 have demonstrated impressive potential in a wide range of fields, including biology, genomics and healthcare. Numerous studies have attempted to apply pre-trained LLMs to single-cell data analysis within one tissue. However, when it comes to cross-tissue cell annotation, LLMs often suffer from unsatisfactory performance due to the lack of specialized biological knowledge regarding genes and tissues. In this paper, we introduce scRAG, a novel framework that incorporates advanced LLM-based RAG techniques into cross-tissue single-cell annotation. scRAG utilizes LLMs to retrieve structured triples from knowledge graphs and unstructured similar cell information from the reference cell database, and it generates candidate cell types. The framework further optimizes predictions by retrieving marker genes from both candidate cells and similar cells to refine its results. Extensive experiments on a cross-tissue dataset demonstrate that our scRAG framework outperforms various baselines, including generalist models, domain-specific methods, and trained classifiers. The source code is available at https://github.com/YuZhiyin/scRAG."
}
Markdown (Informal)
[scRAG: Hybrid Retrieval-Augmented Generation for LLM-based Cross-Tissue Single-Cell Annotation](https://preview.aclanthology.org/landing_page/2025.findings-acl.53/) (Yu et al., Findings 2025)
ACL