@inproceedings{li-etal-2026-efficiency,
title = "Efficiency vs. Verifiability in Evidence-Aware {RAG}: Does Prompt Compression Preserve Citation Grounding?",
author = "Li, Aiyu and
Peng, Qian and
Chen, Bin",
editor = "Mysore, Sheshera and
Kumar, Sachin and
Balachandran, Vidhisha and
Hayati, Shirley Anugrah and
Brahman, Faeze and
Moussa, Hanane Nour and
Salemi, Alireza",
booktitle = "Proceedings of the Second Workshop on Customizable {NLP}: Progress and Challenges in Customizing {NLP} for a Domain, Application, Group, or Individual ({C}ustom{NLP}4{U})",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.customnlp4u-1.19/",
pages = "202--215",
ISBN = "979-8-89176-396-8",
abstract = "Retrieval-augmented generation (RAG) is widely used in domain-specific and knowledge-intensive applications, where long prompts increase inference cost and may exceed context limits. Prompt compression is therefore appealing, but existing evaluations focus primarily on answer quality, overlooking whether compressed systems remain faithful to the retrieved evidence. In this paper, we ask: does compression that preserves answers also preserve grounding? Using Self-RAG and LLMLingua-2 in a controlled setting, we evaluate compressed RAG on ASQA in terms of both answer correctness and citation grounding. Under increasing compression, answer correctness drops by only 2-4{\%}, whereas grounding drops by 40-50{\%}. This stark divergence shows that answer-only evaluation can substantially overestimate the reliability of compressed RAG in evidence-aware scenarios. We further propose a lightweight hierarchical compression strategy that prioritizes evidence-bearing spans. It recovers nearly all grounding loss while maintaining comparable answer quality. Our results reveal a clear trade-off between efficiency and verifiability, and suggest that compression in RAG should be customized to downstream verification needs rather than treated as a one-size-fits-all efficiency intervention."
}Markdown (Informal)
[Efficiency vs. Verifiability in Evidence-Aware RAG: Does Prompt Compression Preserve Citation Grounding?](https://preview.aclanthology.org/ingest-acl-workshops/2026.customnlp4u-1.19/) (Li et al., CustomNLP4U 2026)
ACL