@inproceedings{zhang-etal-2024-toolbehonest, title = "{T}ool{B}e{H}onest: A Multi-level Hallucination Diagnostic Benchmark for Tool-Augmented Large Language Models", author = "Zhang, Yuxiang and Chen, Jing and Wang, Junjie and Liu, Yaxin and Yang, Cheng and Shi, Chufan and Zhu, Xinyu and Lin, Zihao and Wan, Hanwen and Yang, Yujiu and Sakai, Tetsuya and Feng, Tian and Yamana, Hayato", editor = "Al-Onaizan, Yaser and Bansal, Mohit and Chen, Yun-Nung", booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing", month = nov, year = "2024", address = "Miami, Florida, USA", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/Author-page-Marten-During-lu/2024.emnlp-main.637/", doi = "10.18653/v1/2024.emnlp-main.637", pages = "11388--11422" }