@inproceedings{dogruoz-skantze-2021-open,
title = "How {\textquotedblleft}open{\textquotedblright} are the conversations with open-domain chatbots? A proposal for Speech Event based evaluation",
author = {Do{\u{g}}ru{\"o}z, A. Seza and
Skantze, Gabriel},
editor = "Li, Haizhou and
Levow, Gina-Anne and
Yu, Zhou and
Gupta, Chitralekha and
Sisman, Berrak and
Cai, Siqi and
Vandyke, David and
Dethlefs, Nina and
Wu, Yan and
Li, Junyi Jessy",
booktitle = "Proceedings of the 22nd Annual Meeting of the Special Interest Group on Discourse and Dialogue",
month = jul,
year = "2021",
address = "Singapore and Online",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2021.sigdial-1.41/",
doi = "10.18653/v1/2021.sigdial-1.41",
pages = "392--402",
abstract = "Open-domain chatbots are supposed to converse freely with humans without being restricted to a topic, task or domain. However, the boundaries and/or contents of open-domain conversations are not clear. To clarify the boundaries of {\textquotedblleft}openness{\textquotedblright}, we conduct two studies: First, we classify the types of {\textquotedblleft}speech events{\textquotedblright} encountered in a chatbot evaluation data set (i.e., Meena by Google) and find that these conversations mainly cover the {\textquotedblleft}small talk{\textquotedblright} category and exclude the other speech event categories encountered in real life human-human communication. Second, we conduct a small-scale pilot study to generate online conversations covering a wider range of speech event categories between two humans vs. a human and a state-of-the-art chatbot (i.e., Blender by Facebook). A human evaluation of these generated conversations indicates a preference for human-human conversations, since the human-chatbot conversations lack coherence in most speech event categories. Based on these results, we suggest (a) using the term {\textquotedblleft}small talk{\textquotedblright} instead of {\textquotedblleft}open-domain{\textquotedblright} for the current chatbots which are not that {\textquotedblleft}open{\textquotedblright} in terms of conversational abilities yet, and (b) revising the evaluation methods to test the chatbot conversations against other speech events."
}
Markdown (Informal)
[How “open” are the conversations with open-domain chatbots? A proposal for Speech Event based evaluation](https://preview.aclanthology.org/jlcl-multiple-ingestion/2021.sigdial-1.41/) (Doğruöz & Skantze, SIGDIAL 2021)
ACL