@inproceedings{louis-etal-2025-pisco,
title = "{PISCO}: Pretty Simple Compression for Retrieval-Augmented Generation",
author = "Louis, Maxime and
D{\'e}jean, Herv{\'e} and
Clinchant, St{\'e}phane",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/landing_page/2025.findings-acl.800/",
pages = "15506--15521",
ISBN = "979-8-89176-256-5",
abstract = "Retrieval-Augmented Generation (RAG) pipelines enhance Large Language Models (LLMs) by retrieving relevant documents, but they face scalability issues due to high inference costs and limited context size. Document compression is a practical solution, but current soft compression methods often suffer from accuracy losses and require extensive pretraining. In this paper, we introduce PISCO, a novel method that achieves a 16x compression rate with minimal accuracy loss (0-3{\%}) across diverse RAG-based question-answering (QA) tasks. Unlike existing approaches, PISCO requires no pretraining or annotated data, relying solely on sequence-level knowledge distillation from document-based questions. With the ability to fine-tune a 7-10B LLM in 24 hours on a single A100 GPU, PISCO offers a highly efficient and scalable solution. We present comprehensive experiments showing that PISCO outperforms existing compression models by 8{\%} in accuracy."
}
Markdown (Informal)
[PISCO: Pretty Simple Compression for Retrieval-Augmented Generation](https://preview.aclanthology.org/landing_page/2025.findings-acl.800/) (Louis et al., Findings 2025)
ACL