@inproceedings{chawla-etal-2025-fb,
title = "{FB}-{RAG}: Improving {RAG} with Forward and Backward Lookup",
author = "Chawla, Kushal and
Samuel, Alfy and
Kumar, Anoop and
Liu, Daben",
editor = "Inui, Kentaro and
Sakti, Sakriani and
Wang, Haofen and
Wong, Derek F. and
Bhattacharyya, Pushpak and
Banerjee, Biplab and
Ekbal, Asif and
Chakraborty, Tanmoy and
Singh, Dhirendra Pratap",
booktitle = "Proceedings of the 14th International Joint Conference on Natural Language Processing and the 4th Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics",
month = dec,
year = "2025",
address = "Mumbai, India",
publisher = "The Asian Federation of Natural Language Processing and The Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-ijcnlp-aacl/2025.findings-ijcnlp.62/",
pages = "1055--1071",
ISBN = "979-8-89176-303-6",
abstract = "Traditional Retrieval-Augmented Generation (RAG) struggles with complex queries that lack strong signals to retrieve the most relevant context, forcing a trade-off between choosing a small context that misses key information and a large context that confuses the LLM. To address this, we propose Forward-Backward RAG (FB-RAG), a new training-free framework based on a simple yet powerful forward-looking strategy. FB-RAG employs a light-weight LLM to peek into potential future generations, using evidence from multiple sampled outputs to precisely identify the most relevant context for a final, more powerful generator. This improves performance without complex finetuning or Reinforcement Learning common in prior work. Across 9 datasets from LongBench and $\infty$Bench, FB-RAG consistently delivers strong results. Further, the performance gains can be achieved with reduced latency due to a shorter, more focused prompt for the powerful generator. On EN.QA dataset, FB-RAG matches the leading baseline with over 48{\%} latency reduction or achieves an 8{\%} performance improvement with a 10{\%} latency reduction. Our analysis finds cases where even when the forward-looking LLM fails to generate correct answers, its attempts are sufficient to guide the final model to an accurate response, demonstrating how smaller LLMs can systematically improve the performance and efficiency of larger ones."
}Markdown (Informal)
[FB-RAG: Improving RAG with Forward and Backward Lookup](https://preview.aclanthology.org/ingest-ijcnlp-aacl/2025.findings-ijcnlp.62/) (Chawla et al., Findings 2025)
ACL
- Kushal Chawla, Alfy Samuel, Anoop Kumar, and Daben Liu. 2025. FB-RAG: Improving RAG with Forward and Backward Lookup. In Proceedings of the 14th International Joint Conference on Natural Language Processing and the 4th Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics, pages 1055–1071, Mumbai, India. The Asian Federation of Natural Language Processing and The Association for Computational Linguistics.