@inproceedings{chang-etal-2026-cascadedebate,
title = "{C}ascade{D}ebate: Multi-Agent Deliberation for Cost-Aware {LLM} Cascades",
author = "Chang, Raeyoung and
Kwon, Dongwook and
Lee, Jisoo and
Verma, Nikhil",
editor = "Li, Yunyao and
Rehm, Georg and
Tu, Mei",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics ({ACL} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.acl-industry.93/",
pages = "1329--1340",
ISBN = "979-8-89176-394-4",
abstract = "Cascaded LLM systems coordinate models of varying sizes with human experts to balance accuracy, cost, and abstention under uncertainty. However, single-model tiers at each stage falter on ambiguous queries, triggering premature escalations to costlier models or experts due to under-confidence and inefficient compute scaling. **CascadeDebate** addresses this critical gap by inserting multi-agent deliberation directly at each tier{'}s escalation boundary. Confidence-based routers activate lightweight agent ensembles only for uncertain cases, enabling consensus-driven resolution of ambiguities internally, without invoking higher-cost upgrades. Our unified architecture alternates single-model inference with selective multi-agent deliberation across model scales, culminating in human experts as final fallback. This design scales test-time compute dynamically to query difficulty. Across five benchmarks spanning science, medicine, and general knowledge, CascadeDebate outperforms strong single-model cascades and standalone multi-agent systems by up to 26.75{\%}.An online threshold optimizer proves essential, boosting accuracy 20.98{--}52.33{\%} relative improvement over fixed policies and enabling elastic adaptation to real-world distributions."
}Markdown (Informal)
[CascadeDebate: Multi-Agent Deliberation for Cost-Aware LLM Cascades](https://preview.aclanthology.org/ingest-acl/2026.acl-industry.93/) (Chang et al., ACL 2026)
ACL