@inproceedings{xu-etal-2026-molsafeeval,
title = "{M}ol{S}afe{E}val: A Benchmark for Uncovering Safety Risks in {AI}-Generated Molecules",
author = "Xu, Tong and
Cao, Xinzhe and
Zhu, Zhihui and
Ding, Keyan and
Chen, Huajun",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.findings-acl.1679/",
pages = "33621--33648",
ISBN = "979-8-89176-395-1",
abstract = "Current molecular generation benchmarks emphasize task complexity, molecule novelty, and property alignment; they largely overlook a critical concern: the potential safety risks of AI-generated molecules. In practice, many generative models may produce molecules with toxic, reactive, or otherwise hazardous characteristics{---}posing hidden dangers that remain insufficiently addressed. To address this gap, we introduce MolSafeEval, a benchmark dedicated to evaluating and analyzing the safety risks of molecular generation. Unlike prior approaches that rely on narrow toxicity predictors, MolSafeEval integrates heterogeneous safety knowledge{---}ranging from toxicological databases to hazard rules{---}into a structured molecular safety knowledge graph. This graph serves as a foundation for large language model{--}based reasoning, enabling systematic detection and explanation of unsafe features in generated compounds. We further categorize molecular generative models into four representative task types{---}unconditional generation, property optimization, target protein{--}based design, and text-based generation{---}and provide standardized datasets and safety evaluation protocols for each."
}Markdown (Informal)
[MolSafeEval: A Benchmark for Uncovering Safety Risks in AI-Generated Molecules](https://preview.aclanthology.org/ingest-acl/2026.findings-acl.1679/) (Xu et al., Findings 2026)
ACL