@inproceedings{chen-etal-2026-condenseflow,
title = "{C}ondense{F}low: Scalable Latent Space Collaboration via Semantic Compression for Multi-Agent Systems",
author = "Chen, Xiaoyu and
Wu, Fengge and
Junsuo, Zhao and
Fan, Yun",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.findings-acl.669/",
pages = "13694--13712",
ISBN = "979-8-89176-395-1",
abstract = "Full-state latent communication in LLM-based multi-agent systems offers richer semantics than text but suffers from memory overhead scaling linearly with collaboration rounds. We propose \textbf{CondenseFlow}, which introduces the \textbf{Latent Thought Condenser (LTC)}{---}a lightweight module using learnable semantic probes to compress KV caches into fixed-size representations, achieving $\mathcal{O}(1)$ communication complexity regardless of context length. We theoretically prove that compression error is bounded by attention concentration and accumulates controllably across rounds. On seven benchmarks spanning six models, CondenseFlow reduces KV cache memory by over 99{\%} and inference latency by approximately 20{\%} compared to dense transfer with negligible accuracy degradation, while outperforming text-based methods by 1.7 percentage points on average across all configurations. Code is available at https://github.com/xxy33/condenseflow."
}Markdown (Informal)
[CondenseFlow: Scalable Latent Space Collaboration via Semantic Compression for Multi-Agent Systems](https://preview.aclanthology.org/ingest-acl/2026.findings-acl.669/) (Chen et al., Findings 2026)
ACL