@inproceedings{hathidara-etal-2026-disambiguation,
title = "Disambiguation-Centric Finetuning Makes Enterprise Tool-Calling {LLM}s More Realistic and Less Risky",
author = "Hathidara, Ashutosh and
Yu, Julien and
Schreiber, Sebastian",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.findings-acl.469/",
pages = "9624--9652",
ISBN = "979-8-89176-395-1",
abstract = "Large language models (LLMs) are increasingly tasked with invoking enterprise APIs, yet they routinely falter when near-duplicate tools vie for the same user intent or when required arguments are left underspecified. We introduce **DiaFORGE** (**Dia**logue **F**ramework for **O**rganic **R**esponse **G**eneration **E**valuation), a disambiguation-centric, three-stage pipeline that (i) synthesizes persona-driven, multi-turn dialogues in which the assistant must distinguish among highly similar tools, (ii) performs supervised fine-tuning of open-source models with reasoning traces across 3B - 70B parameters, and (iii) evaluates real-world readiness via a dynamic suite that redeploys each model in a live agentic loop and reports end-to-end goal completion alongside conventional static metrics. On our dynamic benchmark DiaBENCH, models trained with DiaFORGE raise tool-invocation success by **27 pp over GPT-4o** and by **49 pp over Claude-3.5-Sonnet**, both under optimized prompting. To spur further research, we release an open corpus of **5000 production-grade enterprise API** specifications paired with rigorously validated, disambiguation-focused dialogues, offering a practical blueprint for building reliable, enterprise-ready tool-calling agents."
}Markdown (Informal)
[Disambiguation-Centric Finetuning Makes Enterprise Tool-Calling LLMs More Realistic and Less Risky](https://preview.aclanthology.org/ingest-acl/2026.findings-acl.469/) (Hathidara et al., Findings 2026)
ACL