@inproceedings{guo-etal-2025-enhancing,
title = "Enhancing {RAG} Efficiency with Adaptive Context Compression",
author = "Guo, Shuyu and
Zhang, Shuo and
Ren, Zhaochun",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.findings-emnlp.1307/",
doi = "10.18653/v1/2025.findings-emnlp.1307",
pages = "24061--24076",
ISBN = "979-8-89176-335-7",
abstract = "Retrieval-augmented generation (RAG) enhances large language models (LLMs) with external knowledge but incurs significant inference costs due to lengthy retrieved contexts. While context compression mitigates this issue, existing methods apply fixed compression rates{---}over-compressing simple queries or under-compressing complex ones. We propose Adaptive Context Compression for RAG (ACC-RAG), a framework that dynamically adjusts compression rates based on input complexity, optimizing inference efficiency without loss of accuracy. ACC-RAG combines a hierarchical compressor (for multi-granular embeddings) with a context selector to retain minimal sufficient information, akin to human skimming. Evaluated on Wikipedia and five QA datasets, ACC-RAG outperforms fixed-rate methods and unlocks {\ensuremath{>}}4{\texttimes} faster inference versus standard RAG while maintaining or improving accuracy."
}Markdown (Informal)
[Enhancing RAG Efficiency with Adaptive Context Compression](https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.findings-emnlp.1307/) (Guo et al., Findings 2025)
ACL