@inproceedings{jiang-2025-towards,
title = "Towards Human-Like Dialogue Systems: Integrating Multimodal Emotion Recognition and Non-Verbal Cue Generation",
author = "Jiang, Jingjing",
editor = "Whetten, Ryan and
Sucal, Virgile and
Ngo, Anh and
Chalamalasetti, Kranti and
Inoue, Koji and
Cimino, Gaetano and
Yang, Zachary and
Zenimoto, Yuki and
Rodriguez, Ricardo",
booktitle = "Proceedings of the 21st Workshop of Young Researchers' Roundtable on Spoken Dialogue Systems",
month = aug,
year = "2025",
address = "Avignon, France",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/corrections-2025-10/2025.yrrsds-1.6/",
pages = "15--17",
abstract = "This position paper outlines my research vision for developing human-like dialogue systems capable of both perceiving and expressing emotions through multimodal communication. My current research focuses on two main areas: multimodal emotion recognition and non-verbal cue generation. For emotion recognition, I constructed a Japanese multimodal dialogue dataset that captures natural, dyadic face-to-face interactions and developed an emotional valence recognition model that integrates textual, speech and physiological inputs. On the generation side, my research explores non-verbal cue generation for embodied conversational agents (ECAs). Finally, the paper discusses the future of SDSs, emphasizing the shift from traditional turn-based architectures to full-duplex, real-time, multimodal systems."
}
Markdown (Informal)
[Towards Human-Like Dialogue Systems: Integrating Multimodal Emotion Recognition and Non-Verbal Cue Generation](https://preview.aclanthology.org/corrections-2025-10/2025.yrrsds-1.6/) (Jiang, YRRSDS 2025)
ACL