@inproceedings{garcia-anakabe-etal-2026-adding,
title = "Adding Determinism to a Dialogue Agent for a Robotic Environment",
author = "Garcia Anakabe, Oihana and
Cocola, Riccardo and
Aceta, Cristina",
editor = "Riccardi, Giuseppe and
Mousavi, Seyed Mahed and
Torres, Maria Ines and
Yoshino, Koichiro and
Callejas, Zoraida and
Chowdhury, Shammur Absar and
Chen, Yun-Nung and
Bechet, Frederic and
Gustafson, Joakim and
Damnati, G{\'e}raldine and
Papangelis, Alex and
D{'}Haro, Luis Fernando and
Mendon{\c{c}}a, John and
Bernardi, Raffaella and
Hakkani-Tur, Dilek and
Di Fabbrizio, Giuseppe {''}Pino{''} and
Kawahara, Tatsuya and
Alam, Firoj and
Tur, Gokhan and
Johnston, Michael",
booktitle = "Proceedings of the 16th International Workshop on Spoken Dialogue System Technology",
month = feb,
year = "2026",
address = "Trento, Italy",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/dashboard-stats/2026.iwsds-1.26/",
pages = "253--261",
abstract = "Large Language Models ({LLM}s) have strong capabilities in natural dialogue, but their inherent indeterminacy presents challenges in robotic environments where safety and reliability are critical. In this study, we propose a dialogue agent that has been developed to guide and support human operators during robot demonstrations, following the Learning from Demonstration ({L}f{D}) paradigm, where the robot learns tasks from the operator{'}s actions. The agent presented in this work extends the standard prompt-based {LLM} setup by integrating state graphs that explicitly encode dialogue states and transitions. This structure ensures that user interactions follow the intended path, while still allowing users to communicate in a flexible and natural manner. The state graph agent is benchmarked against a monolithic prompt baseline in challenging dialogue scenarios involving ambiguity, incomplete actions, or operator errors. Despite the {LLM} prompt achieving good standalone performance, the state-controlled agent shows greater contextual understanding, reasoning capability, and advisory performance, leading to more intelligent and reliable interactions."
}