@inproceedings{zhao-etal-2026-self,
title = "Self-{E}mo{Q}: {P}lutchik-Guided Value-based Planning to Drive Streaming Emotional {TTS}",
author = "Zhao, Yue and
Li, Hongyan and
Chen, Yong and
Ji, Luo",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.findings-acl.740/",
pages = "15038--15055",
ISBN = "979-8-89176-395-1",
abstract = "Emotional interaction is increasingly crucial for conversational AI, yet current systems lack a self-emotion determination mechanism to drive the streaming text-to-speech (TTS) synthesis. We propose an emotion-planning framework that determines the emotion prior to the textual generation, grounding the downstream emotional TTS in a streaming manner. The framework is implemented by a plug-and-play LLM module, initialized from pretrained LLMs, and trained by reinforcement learning (RL) with emotions as the actions. A hybrid reward is employed which combines imitation signals with theory-driven scoring, in which the theory of Plutchik{'}s wheel of emotions is adopted. By experiments on DailyDialog, EmoryNLP, IMEOCAP, and MELD, our method outperforms prompting and finetuning baselines on both emotion determination and response quality. We finally implement an entire streaming pipeline for real-time deployment, with the speech quality confirming the framework{'}s emotional alignment, contextual coherence, and expressive fluency. Codes, cases, and demos are available in https://sixingdeguo.github.io/EmoQ-page/."
}Markdown (Informal)
[Self-EmoQ: Plutchik-Guided Value-based Planning to Drive Streaming Emotional TTS](https://preview.aclanthology.org/ingest-acl/2026.findings-acl.740/) (Zhao et al., Findings 2026)
ACL