@inproceedings{mehri-eskenazi-2020-unsupervised,
title = "Unsupervised Evaluation of Interactive Dialog with {D}ialo{GPT}",
author = "Mehri, Shikib and
Eskenazi, Maxine",
editor = "Pietquin, Olivier and
Muresan, Smaranda and
Chen, Vivian and
Kennington, Casey and
Vandyke, David and
Dethlefs, Nina and
Inoue, Koji and
Ekstedt, Erik and
Ultes, Stefan",
booktitle = "Proceedings of the 21th Annual Meeting of the Special Interest Group on Discourse and Dialogue",
month = jul,
year = "2020",
address = "1st virtual meeting",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2020.sigdial-1.28/",
doi = "10.18653/v1/2020.sigdial-1.28",
pages = "225--235",
abstract = "It is important to define meaningful and interpretable automatic evaluation metrics for open-domain dialog research. Standard language generation metrics have been shown to be ineffective for dialog. This paper introduces the FED metric (fine-grained evaluation of dialog), an automatic evaluation metric which uses DialoGPT, without any fine-tuning or supervision. It also introduces the FED dataset which is constructed by annotating a set of human-system and human-human conversations with eighteen fine-grained dialog qualities. The FED metric (1) does not rely on a ground-truth response, (2) does not require training data and (3) measures fine-grained dialog qualities at both the turn and whole dialog levels. FED attains moderate to strong correlation with human judgement at both levels."
}
Markdown (Informal)
[Unsupervised Evaluation of Interactive Dialog with DialoGPT](https://preview.aclanthology.org/add-emnlp-2024-awards/2020.sigdial-1.28/) (Mehri & Eskenazi, SIGDIAL 2020)
ACL