@inproceedings{m-bran-etal-2024-knowledge,
title = "Knowledge Graph Extraction from Total Synthesis Documents",
author = "M Bran, Andres and
Jon{\v{c}}ev, Zlatko and
Schwaller, Philippe",
editor = "Edwards, Carl and
Wang, Qingyun and
Li, Manling and
Zhao, Lawrence and
Hope, Tom and
Ji, Heng",
booktitle = "Proceedings of the 1st Workshop on Language + Molecules (L+M 2024)",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2024.langmol-1.9/",
doi = "10.18653/v1/2024.langmol-1.9",
pages = "74--84",
abstract = "Knowledge graphs (KGs) have emerged as a powerful tool for organizing and integrating complex information, making it a suitable format for scientific knowledge. However, translating scientific knowledge into KGs is challenging as a wide variety of styles and elements to present data and ideas is used. Although efforts for KG extraction (KGE) from scientific documents exist, evaluation remains challenging and field-dependent; and existing benchmarks do not focuse on scientific information. Furthermore, establishing a general benchmark for this task is challenging as not all scientific knowledge has a ground-truth KG representation, making any benchmark prone to ambiguity. Here we propose Graph of Organic Synthesis Benchmark (GOSyBench), a benchmark for KG extraction from scientific documents in chemistry, that leverages the native KG-like structure of synthetic routes in organic chemistry. We develop KG-extraction algorithms based on LLMs (GPT-4, Claude, Mistral) and VLMs (GPT-4o), the best of which reaches 73{\%} recovery accuracy and 59{\%} precision, leaving a lot of room for improvement. We expect GOSyBench can serve as a valuable resource for evaluating and advancing KGE methods in the scientific domain, ultimately facilitating better organization, integration, and discovery of scientific knowledge."
}
Markdown (Informal)
[Knowledge Graph Extraction from Total Synthesis Documents](https://preview.aclanthology.org/fix-sig-urls/2024.langmol-1.9/) (M Bran et al., LangMol 2024)
ACL