@inproceedings{xu-etal-2025-efficient,
title = "An Efficient Dialogue Policy Agent with Model-Based Causal Reinforcement Learning",
author = "Xu, Kai and
Wang, Zhenyu and
Zhao, Yangyang and
Fang, Bopeng",
editor = "Rambow, Owen and
Wanner, Leo and
Apidianaki, Marianna and
Al-Khalifa, Hend and
Eugenio, Barbara Di and
Schockaert, Steven",
booktitle = "Proceedings of the 31st International Conference on Computational Linguistics",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2025.coling-main.490/",
pages = "7331--7343",
abstract = "Dialogue policy trains an agent to select dialogue actions frequently implemented via deep reinforcement learning (DRL). The model-based reinforcement methods built a world model to generate simulated data to alleviate the sample inefficiency. However, traditional world model methods merely consider one-step dialogues, leading to an inaccurate environmental simulation. Furthermore, different users may have different intention preferences, while most existing studies lack consideration of the intention-preferences causal relationship. This paper proposes a novel framework for dialogue policy learning named MCA, implemented through model-based reinforcement learning with automatically constructed causal chains. The MCA model utilizes an autoregressive Transformer to model dialogue trajectories, enabling a more accurate simulation of the environment. Additionally, it constructs a causal chains module that outputs latent preference distributions for intention-action pairs, thereby elucidating the relationship between user intentions and agent actions. The experimental results show that MCA can achieve state-of-the-art performances on three dialogue datasets over the compared dialogue agents, highlighting its effectiveness and robustness."
}
Markdown (Informal)
[An Efficient Dialogue Policy Agent with Model-Based Causal Reinforcement Learning](https://preview.aclanthology.org/fix-sig-urls/2025.coling-main.490/) (Xu et al., COLING 2025)
ACL