@inproceedings{mizutani-etal-2025-enhancing,
title = "Enhancing {LLM}-Based Molecular Captioning with Molecular Fingerprints",
author = "Mizutani, Keisuke and
Ryonosuke, Koriki and
Tokuyama, Kento",
editor = "Inui, Kentaro and
Sakti, Sakriani and
Wang, Haofen and
Wong, Derek F. and
Bhattacharyya, Pushpak and
Banerjee, Biplab and
Ekbal, Asif and
Chakraborty, Tanmoy and
Singh, Dhirendra Pratap",
booktitle = "Proceedings of the 14th International Joint Conference on Natural Language Processing and the 4th Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics",
month = dec,
year = "2025",
address = "Mumbai, India",
publisher = "The Asian Federation of Natural Language Processing and The Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-ijcnlp-aacl/2025.findings-ijcnlp.22/",
pages = "397--410",
ISBN = "979-8-89176-303-6",
abstract = "The development of large language models (LLMs) has resulted in significant transformations in the field of chemistry, with potential applications in molecular science. Traditionally, the exploration of methods to enhance pre-trained general-purpose LLMs has focused on techniques like supervised fine-tuning (SFT) and retrieval-augmented generation (RAG), to improve model performance and tailor them to specific applications. General purpose extended approaches are being researched, but their adaptation within the chemical domain has not progressed significantly. This study advances the application of LLMs in molecular science by exploring SFT of LLMs, and developing RAG and multimodal models, incorporating molecular embeddings derived from molecular fingerprints and other properties. Experimental results show that a multimodal model with fingerprint inputs to the LLM achieved the highest overall performance. For molecular representation based on SMILES notation, fingerprints effectively capture the structural information of molecular compounds, demonstrating the applicability of LLMs in drug discovery research."
}Markdown (Informal)
[Enhancing LLM-Based Molecular Captioning with Molecular Fingerprints](https://preview.aclanthology.org/ingest-ijcnlp-aacl/2025.findings-ijcnlp.22/) (Mizutani et al., Findings 2025)
ACL
- Keisuke Mizutani, Koriki Ryonosuke, and Kento Tokuyama. 2025. Enhancing LLM-Based Molecular Captioning with Molecular Fingerprints. In Proceedings of the 14th International Joint Conference on Natural Language Processing and the 4th Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics, pages 397–410, Mumbai, India. The Asian Federation of Natural Language Processing and The Association for Computational Linguistics.