@inproceedings{sharma-etal-2025-og,
title = "{OG}-{RAG}: Ontology-grounded retrieval-augmented generation for large language models",
author = "Sharma, Kartik and
Kumar, Peeyush and
Li, Yunqing",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-luhme/2025.emnlp-main.1674/",
doi = "10.18653/v1/2025.emnlp-main.1674",
pages = "32950--32969",
ISBN = "979-8-89176-332-6",
abstract = "While LLMs are widely used for generic tasks like question answering and search, they struggle to adapt to specialized knowledge, such as industrial workflows in healthcare, legal, and agricultural sectors, as well as knowledge-driven tasks such as news journalism, investigative research, and consulting without expensive fine-tuning or sub-optimal retrieval methods. Existing retrieval-augmented models, such as RAG, offer improvements but fail to account for structured domain knowledge, leading to suboptimal context generation. Ontologies, which conceptually organize domain knowledge by defining entities and their interrelationships, offer a structured representation to address this gap. This paper presents OG-RAG, an Ontology-Grounded Retrieval Augmented Generation method designed to enhance LLM-generated responses by anchoring retrieval processes in domain-specific ontologies. OG-RAG constructs a hypergraph representation of domain documents, where each hyperedge encapsulates clusters of factual knowledge grounded using domain-specific ontology and retrieves a minimal set of hyperedges for a given query using an optimization algorithm. Our evaluations demonstrate that OG-RAG increases the recall of accurate facts by 55{\%} and improves response correctness by 40{\%} across four different LLMs. Additionally, OG-RAG enables 30{\%} faster attribution of responses to context and boosts fact-based reasoning accuracy by 27{\%} compared to baseline methods. We release the code at [https://github.com/microsoft/ograg2](https://github.com/microsoft/ograg2)."
}Markdown (Informal)
[OG-RAG: Ontology-grounded retrieval-augmented generation for large language models](https://preview.aclanthology.org/ingest-luhme/2025.emnlp-main.1674/) (Sharma et al., EMNLP 2025)
ACL