@inproceedings{rabinovich-anaby-tavor-2025-robustness,
title = "On the Robustness of Agentic Function Calling",
author = "Rabinovich, Ella and
Anaby Tavor, Ateret",
editor = "Cao, Trista and
Das, Anubrata and
Kumarage, Tharindu and
Wan, Yixin and
Krishna, Satyapriya and
Mehrabi, Ninareh and
Dhamala, Jwala and
Ramakrishna, Anil and
Galystan, Aram and
Kumar, Anoop and
Gupta, Rahul and
Chang, Kai-Wei",
booktitle = "Proceedings of the 5th Workshop on Trustworthy NLP (TrustNLP 2025)",
month = may,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2025.trustnlp-main.20/",
pages = "298--304",
ISBN = "979-8-89176-233-6",
abstract = "Large Language Models (LLMs) are increasingly acting as autonomous agents, with function calling (FC) capabilities enabling them to invoke specific tools for tasks. While prior research has primarily focused on improving FC accuracy, little attention has been given to the robustness of these agents to perturbations in their input. We introduce a benchmark assessing FC robustness in two key areas: resilience to naturalistic query variations, and stability in function calling when the toolkit expands with semantically related tools. Evaluating best-performing FC models on a carefully expanded subset of the Berkeley function calling leaderboard (BFCL), we identify critical weaknesses in existing evaluation methodologies, and highlight areas for improvement in real-world agentic deployments."
}
Markdown (Informal)
[On the Robustness of Agentic Function Calling](https://preview.aclanthology.org/fix-sig-urls/2025.trustnlp-main.20/) (Rabinovich & Anaby Tavor, TrustNLP 2025)
ACL
- Ella Rabinovich and Ateret Anaby Tavor. 2025. On the Robustness of Agentic Function Calling. In Proceedings of the 5th Workshop on Trustworthy NLP (TrustNLP 2025), pages 298–304, Albuquerque, New Mexico. Association for Computational Linguistics.