@inproceedings{yu-etal-2017-voila,
title = "{VOILA}: An Optimised Dialogue System for Interactively Learning Visually-Grounded Word Meanings (Demonstration System)",
author = "Yu, Yanchao and
Eshghi, Arash and
Lemon, Oliver",
editor = "Jokinen, Kristiina and
Stede, Manfred and
DeVault, David and
Louis, Annie",
booktitle = "Proceedings of the 18th Annual {SIG}dial Meeting on Discourse and Dialogue",
month = aug,
year = "2017",
address = {Saarbr{\"u}cken, Germany},
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/W17-5524/",
doi = "10.18653/v1/W17-5524",
pages = "197--200",
abstract = "We present VOILA: an optimised, multi-modal dialogue agent for interactive learning of visually grounded word meanings from a human user. VOILA is: (1) able to learn new visual categories interactively from users from scratch; (2) trained on real human-human dialogues in the same domain, and so is able to conduct natural spontaneous dialogue; (3) optimised to find the most effective trade-off between the accuracy of the visual categories it learns and the cost it incurs to users. VOILA is deployed on Furhat, a human-like, multi-modal robot head with back-projection of the face, and a graphical virtual character."
}
Markdown (Informal)
[VOILA: An Optimised Dialogue System for Interactively Learning Visually-Grounded Word Meanings (Demonstration System)](https://preview.aclanthology.org/jlcl-multiple-ingestion/W17-5524/) (Yu et al., SIGDIAL 2017)
ACL