@inproceedings{luo-etal-2025-mmath,
title = "{MMATH}: A Multilingual Benchmark for Mathematical Reasoning",
author = "Luo, Wenyang and
Zhao, Xin and
Sha, Jing and
Wang, Shijin and
Wen, Ji-Rong",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.findings-emnlp.598/",
doi = "10.18653/v1/2025.findings-emnlp.598",
pages = "11187--11202",
ISBN = "979-8-89176-335-7",
abstract = "The advent of large reasoning models, such as OpenAI o1 and DeepSeek R1, has significantly advanced complex reasoning tasks. However, their capabilities in multilingual complex reasoning remain underexplored, with existing efforts largely focused on simpler tasks like MGSM. To address this gap, we introduce \textbf{}, a benchmark for multilingual complex reasoning spanning 374 high-quality math problems across 10 typologically diverse languages. Using , we observe that even advanced models like DeepSeek R1 exhibit substantial performance disparities across languages and suffer from a critical \textit{off-target} issue{---}generating responses in unintended languages. To address this, we explore strategies including prompting and training, demonstrating that reasoning in English and answering in target languages can simultaneously enhance performance and preserve target-language consistency. Our findings offer new insights and practical strategies for advancing the multilingual reasoning capabilities of large language models. Our code and data could be found at \url{https://github.com/RUCAIBox/MMATH}."
}Markdown (Informal)
[MMATH: A Multilingual Benchmark for Mathematical Reasoning](https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.findings-emnlp.598/) (Luo et al., Findings 2025)
ACL