@inproceedings{ge-etal-2025-quackir,
title = "{Q}uack{IR}: Retrieval in {D}uck{DB} and Other Relational Database Management Systems",
author = "Ge, Yijun and
Chen, Zijian and
Lin, Jimmy",
editor = "Potdar, Saloni and
Rojas-Barahona, Lina and
Montella, Sebastien",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: Industry Track",
month = nov,
year = "2025",
address = "Suzhou (China)",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-industry.33/",
pages = "492--500",
ISBN = "979-8-89176-333-3",
abstract = "Enterprises today are increasingly compelled to adopt dedicated vector databases for retrieval-augmented generation (RAG) in applications based on large language models (LLMs).As a potential alternative for these vector databases, we propose that organizations leverage existing relational databases for retrieval, which many have already deployed in their enterprise data lakes, thus minimizing additional complexity in their software stacks.To demonstrate the simplicity and feasibility of this approach, we present QuackIR, an information retrieval (IR) toolkit built on relational database management systems (RDBMSes), with integrations in DuckDB, SQLite, and PostgreSQL. Using QuackIR, we benchmark the sparse and dense retrieval capabilities of these popular RDBMSes and demonstrate that their effectiveness is comparable to baselines from established IR toolkits. Our results highlight the potential of relational databases as a simple option for RAG scenarios due to their established widespread usage and the easy integration of retrieval abilities. Our implementation is available at quackir.io."
}Markdown (Informal)
[QuackIR: Retrieval in DuckDB and Other Relational Database Management Systems](https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-industry.33/) (Ge et al., EMNLP 2025)
ACL