@inproceedings{vath-vu-2019-combine,
title = "To Combine or Not To Combine? A Rainbow Deep Reinforcement Learning Agent for Dialog Policies",
author = {V{\"a}th, Dirk and
Vu, Ngoc Thang},
editor = "Nakamura, Satoshi and
Gasic, Milica and
Zukerman, Ingrid and
Skantze, Gabriel and
Nakano, Mikio and
Papangelis, Alexandros and
Ultes, Stefan and
Yoshino, Koichiro",
booktitle = "Proceedings of the 20th Annual SIGdial Meeting on Discourse and Dialogue",
month = sep,
year = "2019",
address = "Stockholm, Sweden",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/W19-5908/",
doi = "10.18653/v1/W19-5908",
pages = "62--67",
abstract = "In this paper, we explore state-of-the-art deep reinforcement learning methods for dialog policy training such as prioritized experience replay, double deep Q-Networks, dueling network architectures and distributional learning. Our main findings show that each individual method improves the rewards and the task success rate but combining these methods in a Rainbow agent, which performs best across tasks and environments, is a non-trivial task. We, therefore, provide insights about the influence of each method on the combination and how to combine them to form a Rainbow agent."
}
Markdown (Informal)
[To Combine or Not To Combine? A Rainbow Deep Reinforcement Learning Agent for Dialog Policies](https://preview.aclanthology.org/fix-sig-urls/W19-5908/) (Väth & Vu, SIGDIAL 2019)
ACL