@inproceedings{chen-etal-2026-graph,
title = "Graph Explorer: Training Faithful {KG} Agents with Visibility-Grounded Supervision",
author = "Chen, Yifeng and
Wan, Sicheng and
Zhang, Tianyi and
Zhang, Xuezhou",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.findings-acl.387/",
pages = "7875--7890",
ISBN = "979-8-89176-395-1",
abstract = "Large language models (LLMs) are strong reasoners but still hallucinate and make unreliable decisions on knowledge-intensive questions. Knowledge graphs (KGs) provide explicit, auditable facts, motivating KGQA agents that interact with KGs via tool calls to reduce hallucinations. However, LLM agents often struggle to reliably manipulate KG-specific symbols (entity IDs and relation names), leading to invalid or hallucinated tool-call arguments, and high-quality step-by-step supervision for such tool use is scarce. Meanwhile, large datasets of expert SPARQL programs exist for Freebase KGQA, but naively converting them into action supervision is brittle: SPARQL assumes a global view of the KG, while an agent acts from a truncated, local prompt, so expert steps can reference KG IDs (entity/relation/attribute symbols) that are not visible at decision time. We present Graph Explorer, a fully automatic data synthesis pipeline that turns expert SPARQL into executable, visibility-grounded (actions may use only IDs shown in the prompt) tool supervision without manual trace labeling. Graph Explorer compiles SPARQL into tool-call plans, executes them under the same context-control policy used at inference, and retains only tool-interaction traces whose tool-call arguments are visible at decision time, yielding clean (context, next-action) pairs for action-centric fine-tuning. We evaluate with a strict finish-or-fail protocol (success only if the agent issues a valid within budget). Under this protocol, our fine-tuned Qwen3-8B reaches 74.0/80.2 Hit@1 on CWQ/WebQSP, improving over a reproduced prompting baseline by +22.5/+16.2 points, indicating more faithful multi-step graph exploration from visible evidence."
}Markdown (Informal)
[Graph Explorer: Training Faithful KG Agents with Visibility-Grounded Supervision](https://preview.aclanthology.org/ingest-acl/2026.findings-acl.387/) (Chen et al., Findings 2026)
ACL