@inproceedings{huang-etal-2021-counterfactual,
title = "Counterfactual Matters: Intrinsic Probing For Dialogue State Tracking",
author = "Huang, Yi and
Feng, Junlan and
Wu, Xiaoting and
Du, Xiaoyu",
editor = "Wei, Wei and
Dai, Bo and
Zhao, Tuo and
Li, Lihong and
Yang, Diyi and
Chen, Yun-Nung and
Boureau, Y-Lan and
Celikyilmaz, Asli and
Geramifard, Alborz and
Ahuja, Aman and
Jiang, Haoming",
booktitle = "The First Workshop on Evaluations and Assessments of Neural Conversation Systems",
month = nov,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.eancs-1.1",
doi = "10.18653/v1/2021.eancs-1.1",
pages = "1--6",
abstract = "A Dialogue State Tracker (DST) is a core component of modular task-oriented dialogue systems. Tremendous research progress has been made in past ten years to improve performance of DSTs especially on benchmark datasets. However, their generalization to novel and realistic scenarios beyond the held-out conversations is limited. In this paper, we design experimental studies to answer: 1) How does the distribution of dialogue data affect the performance of DSTs? 2) What are effective ways to probe counterfactual matter for DSTs? Our findings are: the performance variance of generative DSTs is not only due to the model structure itself, but can be attributed to the distribution of cross-domain values. Evaluating iconic generative DST models on MultiWOZ dataset with counterfactuals results in a significant performance drop of up to 34.64{\%} (from 50.91{\%} to 16.27{\%}) in absolute joint goal accuracy. It is believed that our experimental results can guide the future work to better understand the intrinsic core of DST and rethink the suitable way for specific tasks given the application property.",
}
Markdown (Informal)
[Counterfactual Matters: Intrinsic Probing For Dialogue State Tracking](https://aclanthology.org/2021.eancs-1.1) (Huang et al., EANCS 2021)
ACL