@inproceedings{lu-etal-2025-toolsandbox, title = "{T}ool{S}andbox: A Stateful, Conversational, Interactive Evaluation Benchmark for {LLM} Tool Use Capabilities", author = "Lu, Jiarui and Holleis, Thomas and Zhang, Yizhe and Aumayer, Bernhard and Nan, Feng and Bai, Haoping and Ma, Shuang and Ma, Shen and Li, Mengyu and Yin, Guoli and Wang, Zirui and Pang, Ruoming", editor = "Chiruzzo, Luis and Ritter, Alan and Wang, Lu", booktitle = "Findings of the Association for Computational Linguistics: NAACL 2025", month = apr, year = "2025", address = "Albuquerque, New Mexico", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/fix-sig-urls/2025.findings-naacl.65/", pages = "1160--1183", ISBN = "979-8-89176-195-7" }