@inproceedings{laitenberger-etal-2025-stronger,
title = "Stronger Baselines for Retrieval-Augmented Generation with Long-Context Language Models",
author = "Laitenberger, Alex and
Manning, Christopher D and
Liu, Nelson F.",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-main.1656/",
pages = "32547--32557",
ISBN = "979-8-89176-332-6",
abstract = "With the rise of long-context language models (LMs) capable of processing tens of thousands of tokens in a single context window, do multi-stage retrieval-augmented generation (RAG) pipelines still offer measurable benefits over simpler, single-stage approaches? To assess this question, we conduct a controlled evaluation for QA tasks under systematically scaled token budgets, comparing two recent multi-stage pipelines, ReadAgent and RAPTOR, against three baselines, including DOS RAG (Document{'}s Original Structure RAG), a simple retrieve-then-read method that preserves original passage order. Despite its straightforward design, DOS RAG consistently matches or outperforms more intricate methods on multiple long-context QA benchmarks. We trace this strength to a combination of maintaining source fidelity and document structure, prioritizing recall within effective context windows, and favoring simplicity over added pipeline complexity. We recommend establishing DOS RAG as a simple yet strong baseline for future RAG evaluations, paired with state-of-the-art embedding and language models, and benchmarked under matched token budgets, to ensure that added pipeline complexity is justified by clear performance gains as models continue to improve."
}Markdown (Informal)
[Stronger Baselines for Retrieval-Augmented Generation with Long-Context Language Models](https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-main.1656/) (Laitenberger et al., EMNLP 2025)
ACL