@inproceedings{zhao-etal-2025-semantic,
title = "Semantic-Aware Action Space Compression via {LLM}-{DRL} Synergy for Efficient Task-oriented Dialogue Policy Exploration",
author = "Zhao, Yangyang and
Niu, Ben and
Tan, Yuxuan and
Wang, Shihan and
Qin, Libo",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/name-variant-enfa-fane/2025.findings-emnlp.968/",
doi = "10.18653/v1/2025.findings-emnlp.968",
pages = "17808--17820",
ISBN = "979-8-89176-335-7",
abstract = "The flexibility of natural language significantly expands the action space in task-oriented dialogue systems, causing inefficient exploration and slow convergence in deep reinforcement learning (DRL)-based policy optimization. Pre-trained large language models (LLMs), with world knowledge and semantic understanding, offer promising solutions. To this end, we propose LLM-Guided DRL via Semantic-Aware Action Pruning (LLMSAP), a novel framework that synergizes pretrained LLMs with DRL. LLMSAP leverages the world knowledge and contextual understanding of LLMs to guide decision-making via an action feasibility assessment. Instead of requiring LLMs to directly generate optimal actions due to their limited precision in sequential decision tasks, LLMSAP employs a lightweight action pruning mechanism. Specifically, LLMs act as action filters, rapidly eliminating semantically implausible or low-potential actions from multi-turn dialogue context, allowing the DRL agent to focus exploration on a refined candidate subset. This two-stage framework ({``}prune-then-optimize'') avoids extensive LLM fine-tuning while preserving the decision-making precision of DRL. Experiments on multiple benchmarks verify the effectiveness of LLMSAP."
}Markdown (Informal)
[Semantic-Aware Action Space Compression via LLM-DRL Synergy for Efficient Task-oriented Dialogue Policy Exploration](https://preview.aclanthology.org/name-variant-enfa-fane/2025.findings-emnlp.968/) (Zhao et al., Findings 2025)
ACL