@inproceedings{li-etal-2025-500xcompressor,
title = "500x{C}ompressor: Generalized Prompt Compression for Large Language Models",
author = "Li, Zongqian and
Su, Yixuan and
Collier, Nigel",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingestion-acl-25/2025.acl-long.1219/",
pages = "25081--25091",
ISBN = "979-8-89176-251-0",
abstract = "Prompt compression is important for large language models (LLMs) to increase inference speed, reduce costs, and improve user experience. However, current methods face challenges such as low compression ratios and potential training-test overlap during evaluation. To address these issues, we propose 500xCompressor, a method that compresses natural language contexts into a minimum of one special token and demonstrates strong generalization ability. The 500xCompressor introduces approximately 0.3{\%} additional parameters and achieves compression ratios ranging from 6x to 500x, achieving 27-90{\%} reduction in calculations and 55-83{\%} memory savings when generating 100-400 tokens for new and reused prompts at 500x compression, while retaining 70-74{\%} (F1) and 77-84{\%} (Exact Match) of the LLM capabilities compared to using non-compressed prompts. It is designed to compress any text, answer various types of questions, and can be utilized by the original LLM without requiring fine-tuning. Initially, 500xCompressor was pretrained on the ArxivCorpus, followed by fine-tuning on the ArxivQA dataset, and subsequently evaluated on strictly unseen and cross-domain question answering (QA) datasets. This study shows that KV values outperform embeddings in preserving information at high compression ratios. The highly compressive nature of natural language prompts, even for detailed information, suggests potential for future applications and the development of a new LLM language."
}
Markdown (Informal)
[500xCompressor: Generalized Prompt Compression for Large Language Models](https://preview.aclanthology.org/ingestion-acl-25/2025.acl-long.1219/) (Li et al., ACL 2025)
ACL