@inproceedings{yang-etal-2022-chatmatch,
title = "{C}hat{M}atch: Evaluating Chatbots by Autonomous Chat Tournaments",
author = "Yang, Ruolan and
Li, Zitong and
Tang, Haifeng and
Zhu, Kenny",
editor = "Muresan, Smaranda and
Nakov, Preslav and
Villavicencio, Aline",
booktitle = "Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2022.acl-long.522/",
doi = "10.18653/v1/2022.acl-long.522",
pages = "7579--7590",
abstract = "Existing automatic evaluation systems of chatbots mostly rely on static chat scripts as ground truth, which is hard to obtain, and requires access to the models of the bots as a form of {\textquotedblleft}white-box testing{\textquotedblright}. Interactive evaluation mitigates this problem but requires human involvement. In our work, we propose an interactive chatbot evaluation framework in which chatbots compete with each other like in a sports tournament, using flexible scoring metrics. This framework can efficiently rank chatbots independently from their model architectures and the domains for which they are trained."
}
Markdown (Informal)
[ChatMatch: Evaluating Chatbots by Autonomous Chat Tournaments](https://preview.aclanthology.org/jlcl-multiple-ingestion/2022.acl-long.522/) (Yang et al., ACL 2022)
ACL
- Ruolan Yang, Zitong Li, Haifeng Tang, and Kenny Zhu. 2022. ChatMatch: Evaluating Chatbots by Autonomous Chat Tournaments. In Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 7579–7590, Dublin, Ireland. Association for Computational Linguistics.