@inproceedings{guan-etal-2026-supchain,
title = "{S}up{C}hain-Bench: Benchmarking Large Language Models for Real-World Supply Chain Management",
author = "Guan, Shengyue and
Liu, Yihao and
Cao, Lang",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.findings-acl.371/",
pages = "7526--7550",
ISBN = "979-8-89176-395-1",
abstract = "Large language models (LLMs) have shown promise in complex reasoning and tool-based decision making, motivating their application to real-world supply chain management. However, supply chain workflows require reliable long-horizon, multi-step orchestration grounded in domain-specific procedures, which remains challenging for current models. To systematically evaluate LLM performance in this setting, we introduce SupChain-Bench, a unified real-world benchmark that assesses both supply chain domain knowledge and long-horizon tool-based orchestration grounded in standard operating procedures (SOPs). Our experiments reveal substantial gaps in execution reliability across models. We further propose SupChain-ReAct, an SOP-free framework that autonomously synthesizes executable procedures for tool use, achieving the strongest and most consistent tool-calling performance. Our work establishes a principled benchmark for studying reliable long-horizon orchestration in real-world operational settings and highlights significant room for improvement in LLM-based supply chain agents."
}Markdown (Informal)
[SupChain-Bench: Benchmarking Large Language Models for Real-World Supply Chain Management](https://preview.aclanthology.org/ingest-acl/2026.findings-acl.371/) (Guan et al., Findings 2026)
ACL