@inproceedings{yang-etal-2026-business,
title = "Business as Rulesual: A Benchmark and Framework for Business Rule Flow Modeling with {LLM}s",
author = "Yang, Chen and
Xu, Ruping and
Li, Ruizhe and
Cao, Bin and
Fan, Jing",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.acl-long.1625/",
pages = "35198--35220",
ISBN = "979-8-89176-390-6",
abstract = "Extracting structured procedural knowledge from unstructured business documents is a critical yet unresolved bottleneck in process automation. While prior work has focused on extracting linear action flows from instructional texts (e.g., recipes), it has insufficiently addressed the complex logical structures{---}such as conditional branching and parallel execution{---}that are pervasive in real-world regulatory and administrative documents. Furthermore, existing benchmarks are limited by simplistic schemas and shallow logical dependencies, restricting progress toward logic-aware large language models (LLMs). To bridge this ``Logic Gap'', we introduce $\textbf{BREX}$, a carefully curated benchmark comprising 409 real-world business documents and 2,855 expert-annotated rules. Unlike prior datasets centered on narrow service scenarios, BREX spans over 30 vertical domains, covering scientific, industrial, administrative, and financial regulations.We further propose $\textbf{ExIde}$, a structure-aware reasoning framework that investigates five distinct prompting strategies, ranging from implicit semantic alignment to executable grounding via pseudo-code generation, enabling explicit modeling of rule dependencies and providing an out-of-the-box framework for different business customers without finetuning their own LLMs. We benchmark ExIde using 13 state-of-the-art LLMs. Our extensive evaluation reveals that: (1) Executable grounding serves as a superior inductive bias, significantly outperforming standard prompts in rule extraction; and (2) Reasoning-optimized models demonstrate a distinct advantage in tracing long-range dependencies and non-linear rule dependencies compared to standard instruction-tuned models."
}Markdown (Informal)
[Business as Rulesual: A Benchmark and Framework for Business Rule Flow Modeling with LLMs](https://preview.aclanthology.org/ingest-acl/2026.acl-long.1625/) (Yang et al., ACL 2026)
ACL