@inproceedings{zhou-etal-2019-building,
title = "Building Task-Oriented Visual Dialog Systems Through Alternative Optimization Between Dialog Policy and Language Generation",
author = "Zhou, Mingyang and
Arnold, Josh and
Yu, Zhou",
editor = "Inui, Kentaro and
Jiang, Jing and
Ng, Vincent and
Wan, Xiaojun",
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)",
month = nov,
year = "2019",
address = "Hong Kong, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/D19-1014/",
doi = "10.18653/v1/D19-1014",
pages = "143--153",
abstract = "Reinforcement learning (RL) is an effective approach to learn an optimal dialog policy for task-oriented visual dialog systems. A common practice is to apply RL on a neural sequence-to-sequence(seq2seq) framework with the action space being the output vocabulary in the decoder. However, it is difficult to design a reward function that can achieve a balance between learning an effective policy and generating a natural dialog response. This paper proposes a novel framework that alternatively trains a RL policy for image guessing and a supervised seq2seq model to improve dialog generation quality. We evaluate our framework on the GuessWhich task and the framework achieves the state-of-the-art performance in both task completion and dialog quality."
}
Markdown (Informal)
[Building Task-Oriented Visual Dialog Systems Through Alternative Optimization Between Dialog Policy and Language Generation](https://preview.aclanthology.org/fix-sig-urls/D19-1014/) (Zhou et al., EMNLP-IJCNLP 2019)
ACL