@inproceedings{chang-etal-2026-cascadedebate,
title = "{C}ascade{D}ebate: Multi-Agent Deliberation for Cost-Aware {LLM} Cascades",
author = "Chang, Raeyoung and
Kwon, Dongwook and
Lee, Jisoo and
Verma, Nikhil",
editor = "Li, Yunyao and
Rehm, Georg and
Tu, Mei",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 6: Industry Track)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingestion-form-platform/2026.acl-industry.93/",
pages = "1329--1340",
ISBN = "979-8-89176-394-4",
abstract = "Cascaded LLM systems coordinate models of varying sizes with human experts to balance accuracy, cost, and abstention under uncertainty. However, single-model tiers at each stage falter on ambiguous queries, triggering premature escalations to costlier models or experts due to under-confidence and inefficient compute scaling. **CascadeDebate** addresses this critical gap by inserting multi-agent deliberation directly at each tier{'}s escalation boundary. Confidence-based routers activate lightweight agent ensembles only for uncertain cases, enabling consensus-driven resolution of ambiguities internally, without invoking higher-cost upgrades. Our unified architecture alternates single-model inference with selective multi-agent deliberation across model scales, culminating in human experts as final fallback. This design scales test-time compute dynamically to query difficulty. Across five benchmarks spanning science, medicine, and general knowledge, CascadeDebate outperforms strong single-model cascades and standalone multi-agent systems by up to 26.75{\%}.An online threshold optimizer proves essential, boosting accuracy 20.98{--}52.33{\%} relative improvement over fixed policies and enabling elastic adaptation to real-world distributions."
}Markdown (Informal)
[CascadeDebate: Multi-Agent Deliberation for Cost-Aware LLM Cascades](https://preview.aclanthology.org/ingestion-form-platform/2026.acl-industry.93/) (Chang et al., ACL 2026)
ACL
- Raeyoung Chang, Dongwook Kwon, Jisoo Lee, and Nikhil Verma. 2026. CascadeDebate: Multi-Agent Deliberation for Cost-Aware LLM Cascades. In Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 6: Industry Track), pages 1329–1340, San Diego, California, USA. Association for Computational Linguistics.