@inproceedings{zhang-xiong-2025-debate4math,
title = "{D}ebate4{MATH}: Multi-Agent Debate for Fine-Grained Reasoning in Math",
author = "Zhang, Shaowei and
Xiong, Deyi",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/display_plenaries/2025.findings-acl.862/",
pages = "16810--16824",
ISBN = "979-8-89176-256-5",
abstract = "Large language models (LLMs) have demonstrated impressive performance in reasoning. However, existing data annotation methods usually suffer from high annotation cost and the lack of effective automatic validation. To address these issues, we propose a Fine-grained Multi-Agent Debate framework (FMAD) and MMATH-Data, a dataset created by FMAD, which consists of 46K reasoning steps. By prompting multiple agents to debate, FMAD assesses the contribution of each reasoning step to the final solution, with labels based on the judge{'}s confidence score and the winner{'}s position. To facilitate reasoning in math and examine FMAD and MMATH-Data, we further propose two key components: a Multi-Agent Debate Reward Model (MRM) trained on MMATH-Data, which serves as a reward model to provide robust feedback during the optimization process, and MMATH-LLM, a model designed specifically for mathematical reasoning. MMATH-LLM is fine-tuned using reinforcement learning with supervised feedback from MRM, aiming at improving its mathematical reasoning capabilities. Extensive experiments demonstrate that our model achieves 83.4{\%} accuracy on the GSM8K dataset and 45.1{\%} on the MATH dataset, outperforming the state-of-the-art methods by 1.2{\%} and 3.5{\%}, respectively. All data and code will be available soon at GitHub."
}
Markdown (Informal)
[Debate4MATH: Multi-Agent Debate for Fine-Grained Reasoning in Math](https://preview.aclanthology.org/display_plenaries/2025.findings-acl.862/) (Zhang & Xiong, Findings 2025)
ACL