@inproceedings{lu-etal-2025-toolsandbox,
    title = "{T}ool{S}andbox: A Stateful, Conversational, Interactive Evaluation Benchmark for {LLM} Tool Use Capabilities",
    author = "Lu, Jiarui  and
      Holleis, Thomas  and
      Zhang, Yizhe  and
      Aumayer, Bernhard  and
      Nan, Feng  and
      Bai, Haoping  and
      Ma, Shuang  and
      Ma, Shen  and
      Li, Mengyu  and
      Yin, Guoli  and
      Wang, Zirui  and
      Pang, Ruoming",
    editor = "Chiruzzo, Luis  and
      Ritter, Alan  and
      Wang, Lu",
    booktitle = "Findings of the Association for Computational Linguistics: NAACL 2025",
    month = apr,
    year = "2025",
    address = "Albuquerque, New Mexico",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/fix-sig-urls/2025.findings-naacl.65/",
    pages = "1160--1183",
    ISBN = "979-8-89176-195-7"
}