@inproceedings{xian-etal-2025-molrag,
title = "{M}ol{RAG}: Unlocking the Power of Large Language Models for Molecular Property Prediction",
author = "Xian, Ziting and
Gu, Jiawei and
Li, Lingbo and
Liang, Shangsong",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingestion-acl-25/2025.acl-long.755/",
pages = "15513--15531",
ISBN = "979-8-89176-251-0",
abstract = "Recent LLMs exhibit limited effectiveness on molecular property prediction task due to the semantic gap between molecular representations and natural language, as well as the lack of domain-specific knowledge. To address these challenges, we propose MolRAG, a Retrieval-Augmented Generation framework integrating Chain-of-Thought reasoning for molecular property prediction. MolRAG operates by retrieving structurally analogous molecules as contextual references to guide stepwise knowledge reasoning through chemical structure-property relationships. This dual mechanism synergizes molecular similarity analysis with structured inference, while generating human-interpretable rationales grounded in domain knowledge. Experimental results show MolRAG outperforms pre-trained LLMs on four datasets, and even matches supervised methods, achieving performance gains of 1.1{\%}{--}45.7{\%} over direct prediction approaches, demonstrating versatile effectiveness. Our code is available at https://github.com/AcaciaSin/MolRAG."
}
Markdown (Informal)
[MolRAG: Unlocking the Power of Large Language Models for Molecular Property Prediction](https://preview.aclanthology.org/ingestion-acl-25/2025.acl-long.755/) (Xian et al., ACL 2025)
ACL