@inproceedings{wang-etal-2025-flexible,
title = "Flexible Thinking for Multimodal Emotional Support Conversation via Reinforcement Learning",
author = "Wang, Fanfan and
Shen, Xiangqing and
Yu, Jianfei and
Xia, Rui",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.findings-emnlp.70/",
doi = "10.18653/v1/2025.findings-emnlp.70",
pages = "1341--1356",
ISBN = "979-8-89176-335-7",
abstract = "Emotional Support Conversation (ESC) systems aim to alleviate user distress. However, current Chain-of-Thought based ESC methods often employ rigid, text-only reasoning, limiting adaptability in dynamic, multimodal interactions and introducing reasoning noise that degrades support quality. To address this, we introduce ``Flexible Thinking'' for multimodal ESC, enabling models to adaptively select contextually relevant thinking aspects: Visual Scene, Emotion, Situation, and Response Strategy. We first construct training data by manually curating flexible thinking demonstrations on the MESC dataset, then using a Multimodal Large Language Model to synthesize these processes for the full training set. Then, we propose FIRES, a framework integrating Supervised Fine-Tuning (SFT) for initial learning with Reinforcement Learning for refinement. This two-stage approach helps FIRES transcend SFT{'}s generalization limits and, crucially, directly links thinking processes to response quality via tailored rewards, moving beyond imitating potentially imperfect synthetic data. Experiments on MESC and EMOTyDA datasets demonstrate FIRES{'}s effectiveness and generalizability in fostering higher-quality emotional support responses through adaptive reasoning."
}Markdown (Informal)
[Flexible Thinking for Multimodal Emotional Support Conversation via Reinforcement Learning](https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.findings-emnlp.70/) (Wang et al., Findings 2025)
ACL