@inproceedings{liao-etal-2026-nl,
title = "{NL} $\Rightarrow$ Schedule: Evaluate Multitask Scheduling Capability of Large Language Models",
author = "Liao, Wenrui and
Du, Weihong and
Li, Yi and
Liang, Hongru and
Lei, Wenqiang",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.acl-long.1648/",
pages = "35620--35640",
ISBN = "979-8-89176-390-6",
abstract = "Automated schedule generation for multitask from natural language descriptions has huge potential in modern industry. While classic methods bypass language complexities by using pre-formatted matrices, and recent LLM+solver approaches introduce new fragilities by relying on solver-specific code generation. This raises critical questions: Can large language models (LLMs) solve this NL $\Rightarrow$ Schedule task end-to-end well(RQ1)? If the answer is ``no'', where do they fall short(RQ2)? And how can their capabilities be enhanced (RQ3)? To answer these questions, we introduce NL $\Rightarrow$ Schedule, the first benchmark for this task, equipped with a dataset of 240 description-schedule pairs constructed from real-world materials and a rigorous evaluation suite. Our evaluation of nine state-of-the-art LLMs reveals the limitations of different LLMs in procedure grounding and the strengths of advanced LLMs in global planning via local analysis. To address these shortcomings, we propose Mans, a novel multi-agent framework. Extensive experiments show that Mans achieves more robust performance comparable to six state-of-the-art LLM+solver methods. We hope NL $\Rightarrow$ Schedule and Mans will serve as a solid foundation for automatic scheduling."
}Markdown (Informal)
[NL ⇒ Schedule: Evaluate Multitask Scheduling Capability of Large Language Models](https://preview.aclanthology.org/ingest-acl/2026.acl-long.1648/) (Liao et al., ACL 2026)
ACL