@inproceedings{kim-etal-2026-ontology,
title = "Ontology-Free General-Domain Knowledge Graph-to-Text Generation Dataset Synthesis using Large Language Model",
author = "Kim, Daehui and
Kang, Deokhyung and
Ryu, Sangwon and
Lee, Gary",
editor = "Gupta, Vivek and
Ding, Kaize and
Kokel, Harsha and
Zhao, Yue and
Agarwal, Amit and
Wang, Yu and
Glass, Michael and
Zhang, Yu and
Srinivas, Kavitha and
Chen, Xiusi and
Hassanzadeh, Oktie and
Zhu, Qi and
Chang, Shuaichen and
Luo, Yuan",
booktitle = "Proceedings of the First Workshop on Structured Understanding, Retrieval, and Generation in the {LLM} Era ({SURG}e{LLM} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.surgellm-1.3/",
pages = "52--69",
ISBN = "979-8-89176-406-4",
abstract = "Knowledge Graph-to-Text (G2T) generation involves verbalizing structured knowledge graphs into natural language text. Recent advancements in Pretrained Language Models (PLMs) have improved G2T performance, but their effectiveness relies on datasets with precise graph-text alignment. However, the scarcity of high-quality, general-domain G2T generation datasets restricts progress in the general-domain G2T generation research. To address this issue, we introduce Wikipedia Ontology-Free Graph-text dataset (WikiOFGraph), a new large-scale G2T dataset generated using a novel method that leverages Large Language Models (LLMs) and Data-QuestEval. Our dataset, which contains 5.85M general-domain graph-text pairs, offers high graph-text consistency without reliance on external ontologies. Experimental results demonstrate that PLM fine-tuned on WikiOFGraph outperforms those trained on other datasets across various evaluation metrics. Our method proves to be a scalable and effective solution for generating high-quality G2T data, significantly advancing the field of G2T generation."
}