@inproceedings{huang-etal-2026-chemm,
title = "{C}he{MM}-R1: Enhancing Chemical Structure Recognition and Elucidation with Reasoning Multimodal Large Language Models",
author = "Huang, Liting and
Zhang, Zhihao and
Wang, Shoujin",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.findings-acl.1341/",
pages = "26902--26921",
ISBN = "979-8-89176-395-1",
abstract = "While Multimodal Large Language Models (MLLMs) demonstrate strong reasoning capabilities, they lack domain-specific expertise to effectively perform chemical tasks. For example, existing MLLMs struggle with both the lower-level task of molecular structure recognition and the higher-level task of chemical spectral data elucidation. When faced with complex molecular structures and multimodal chemical data (including spectral images and texts), they often fail to provide reliable inference, resulting in poor performance. Moreover, there are no benchmark datasets for evaluating multi-step multimodal reasoning capacities in the chemistry domain. To this end, we establish CheMM-Bench, a comprehensive benchmark dataset with 48,500 reasoning steps across four chemical tasks (SmilesQA, IupacQA, MwQA, SpectraQA) for evaluating visual reasoning in both molecular structure recognition and spectral analysis. On top of this, we present CheMM-R1, a state-of-the-art chemistry-specific MLLM trained with CheMMGRPO, a novel adaptation of Group Relative Policy Optimisation tailored for chemical reasoning. CheMMGRPO employs domain-specific reward functions to assess chemical validity, structural accuracy, format compliance, and factual correctness. CheMM-R1 surpasses leading proprietary models (GPT-o3, Gemini-2.5-Pro, Claude-3.5-Sonnet, and Grok-2) across all CheMM-Bench tasks. The evaluation code and model are publicly available."
}Markdown (Informal)
[CheMM-R1: Enhancing Chemical Structure Recognition and Elucidation with Reasoning Multimodal Large Language Models](https://preview.aclanthology.org/ingest-acl/2026.findings-acl.1341/) (Huang et al., Findings 2026)
ACL