@inproceedings{zou-etal-2025-stem,
title = "{STEM}-{POM}: Evaluating Language Models Math-Symbol Reasoning in Document Parsing",
author = "Zou, Jiaru and
Wang, Qing and
Thakur, Pratyush and
Kani, Nickvash",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/display_plenaries/2025.findings-acl.429/",
pages = "8183--8199",
ISBN = "979-8-89176-256-5",
abstract = "Advances in large language models (LLMs) have spurred research into enhancing their reasoning capabilities, particularly in math-rich STEM (Science, Technology, Engineering, and Mathematics) documents.While LLMs can generate equations or solve math-related queries, their ability to fully understand and interpret abstract mathematical symbols in long, math-rich documents remains limited. In this paper, we introduce STEM-PoM, a comprehensive benchmark dataset designed to evaluate LLMs' reasoning abilities on math symbols within contextual scientific text. The dataset, sourced from real-world ArXiv documents, contains over 2K math symbols classified as main attributes of variables, constants, operators, and unit descriptors, with additional sub-attributes including scalar/vector/matrix for variables and local/global/discipline-specific labels for both constants and operators. Our extensive experiments demonstrate that state-of-the-art LLMs achieve an average accuracy of 20-60{\%} under in-context learning and 50-60{\%} with fine-tuning, highlighting a substantial gap in their ability to classify mathematical symbols. By improving LLMs' mathematical symbol classification, STEM-PoM further enhances models' downstream mathematical reasoning capabilities. The code and data are available at https://github.com/jiaruzouu/STEM-PoM."
}
Markdown (Informal)
[STEM-POM: Evaluating Language Models Math-Symbol Reasoning in Document Parsing](https://preview.aclanthology.org/display_plenaries/2025.findings-acl.429/) (Zou et al., Findings 2025)
ACL