@inproceedings{demirci-etal-2025-llms,
title = "Are {LLM}s Truly Graph-Savvy? A Comprehensive Evaluation of Graph Generation",
author = "Demirci, Ege and
Kerur, Rithwik and
Singh, Ambuj",
editor = "Zhao, Jin and
Wang, Mingyang and
Liu, Zhu",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 4: Student Research Workshop)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingestion-acl-25/2025.acl-srw.64/",
pages = "884--897",
ISBN = "979-8-89176-254-1",
abstract = "While large language models (LLMs) have demonstrated impressive capabilities across diverse tasks, their ability to generate valid graph structures remains underexplored. We evaluate fifteen state-of-the-art LLMs on five specialized graph generation tasks spanning delivery networks, social networks, quantum circuits, gene-disease networks, and transportation systems. We also test the LLMs using 3 different prompt types: direct, iterative feedback, and program-augmented. Models supported with explicit reasoning modules (o3-mini-high, o1, Claude 3.7 Sonnet, DeepSeek-R1) solve more than twice as many tasks as their general-purpose peers, independent of parameter count. Error forensics reveals two recurring failure modes: smaller parameter size Llama models often violate basic structural constraints, whereas Claude models respect topology but mismanage higher-order logical rules. Allowing models to refine their answers iteratively yields uneven gains, underscoring fundamental differences in error-correction capacity. This work demonstrates that graph competence stems from specialized training methodologies rather than scale, establishing a framework for developing truly graph-savvy language models. Results and verification scripts available at https://github.com/egedemirci/Are-LLMs-Truly-Graph-Savvy-A-Comprehensive-Evaluation-of-Graph-Generation."
}
Markdown (Informal)
[Are LLMs Truly Graph-Savvy? A Comprehensive Evaluation of Graph Generation](https://preview.aclanthology.org/ingestion-acl-25/2025.acl-srw.64/) (Demirci et al., ACL 2025)
ACL