@inproceedings{zhang-etal-2024-toolbehonest,
    title = "{T}ool{B}e{H}onest: A Multi-level Hallucination Diagnostic Benchmark for Tool-Augmented Large Language Models",
    author = "Zhang, Yuxiang  and
      Chen, Jing  and
      Wang, Junjie  and
      Liu, Yaxin  and
      Yang, Cheng  and
      Shi, Chufan  and
      Zhu, Xinyu  and
      Lin, Zihao  and
      Wan, Hanwen  and
      Yang, Yujiu  and
      Sakai, Tetsuya  and
      Feng, Tian  and
      Yamana, Hayato",
    editor = "Al-Onaizan, Yaser  and
      Bansal, Mohit  and
      Chen, Yun-Nung",
    booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing",
    month = nov,
    year = "2024",
    address = "Miami, Florida, USA",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/Author-page-Marten-During-lu/2024.emnlp-main.637/",
    doi = "10.18653/v1/2024.emnlp-main.637",
    pages = "11388--11422"
}