@inproceedings{yao-yadav-2025-diverse,
title = "Diverse Multi-tool Aggregation with Large Language Models for Enhanced Math Reasoning",
author = "Yao, Bohan and
Yadav, Vikas",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.findings-emnlp.1377/",
doi = "10.18653/v1/2025.findings-emnlp.1377",
pages = "25264--25282",
ISBN = "979-8-89176-335-7",
abstract = "Tool usage is a proven technique for developing high-performance reasoning in large language models (LLMs). Our work is focused on emphasizing the utility of leveraging multiple diverse tools for complex reasoning tasks. We present $\textbf{Multi-TAG}$, a $\textbf{Multi}$-$\textbf{T}$ool $\textbf{AG}$gregation-based LLM framework that utilizes multiple diverse tools to solve complex math problems over multiple reasoning steps. At each reasoning step, $\textbf{Multi-TAG}$ invokes multiple tools and accepts the solution of the respective step by tools that have majority agreement on the final answer estimate. $\textbf{Multi-TAG}$ strongly outperforms several standard baselines that use individual tools with the same number of runs, highlighting the importance of multi-tool invocation for solving complex reasoning tasks. We also show that naive aggregation of multiple tools at each reasoning step also leads to substantial improvements of up to 35{\%} accuracy. $\textbf{Multi-TAG}$ then further improves these gains by 7.4{\%} on average on MATH500, AIME, AMC, and OlympiadBench."
}Markdown (Informal)
[Diverse Multi-tool Aggregation with Large Language Models for Enhanced Math Reasoning](https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.findings-emnlp.1377/) (Yao & Yadav, Findings 2025)
ACL