@inproceedings{liu-etal-2025-adaptive,
title = "Adaptive Coopetition: Leveraging Coarse Verifier Signals for Resilient Multi-Agent {LLM} Reasoning",
author = "Liu, Wendy Yaqiao and
Huang, Rui Jerry and
Miin, Anastasia and
Ding, Lei",
editor = "T.y.s.s, Santosh and
Shimizu, Shuichiro and
Gong, Yifan",
booktitle = "The 14th International Joint Conference on Natural Language Processing and The 4th Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics",
month = dec,
year = "2025",
address = "Mumbai, India",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-ijcnlp-aacl/2025.ijcnlp-srw.13/",
pages = "145--155",
ISBN = "979-8-89176-304-3",
abstract = "Inference-time computation is a critical yet challenging paradigm for enhancing the reasoning performance of large language models (LLMs). While existing strategies improve reasoning stability and consistency, they suffer from notable limitations: self-correction often reinforces the model{'}s initial biases, and Multi-Agent Collaboration (MAC) often fails due to the lack of efficient coordination mechanisms, leading to collective errors. Although high-performing verifiers can detect reasoning errors, making them reliable requires substantial training. To address these challenges, we introduce a novel inference-time framework - **Adaptive Coopetition (AdCo)** - in which LLM agents utilize **an adaptive, UCB-based `coopetition' mechanism**. At each round, agents leverage coarse verifier signals to determine whether to collaborate or compete, further iteratively refining their reasoning based on peer feedback. Without relying on high-performance verifiers, our adaptive strategy achieves significant performance gains on mathematical reasoning benchmarks, yielding **a 20{\%} relative improvement** over baselines on the more challenging dataset. Our approach remains robust and consistent in terms of accuracy under different sample sizes and configurations. This adaptive, signal-guided `coopetition' framework enhances reasoning robustness by leveraging bothmodel knowledge diversity and reasoning trace measure, while also promoting uncertainty-driven exploration, especially when participants have comparable capabilities. From this perspective, our work offers a fresh lens on inference-time computation and paves the way for more resilient multi-agent LLM systems."
}Markdown (Informal)
[Adaptive Coopetition: Leveraging Coarse Verifier Signals for Resilient Multi-Agent LLM Reasoning](https://preview.aclanthology.org/ingest-ijcnlp-aacl/2025.ijcnlp-srw.13/) (Liu et al., IJCNLP 2025)
ACL