@inproceedings{zhang-jaitly-2025-sage,
title = "{SAGE}: Steering Dialog Generation with Future-Aware State-Action Augmentation",
author = "Zhang, Yizhe and
Jaitly, Navdeep",
editor = "Abercrombie, Gavin and
Basile, Valerio and
Frenda, Simona and
Tonelli, Sara and
Dudy, Shiran",
booktitle = "Proceedings of the The 4th Workshop on Perspectivist Approaches to NLP",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-emnlp/2025.nlperspectives-1.11/",
pages = "123--132",
ISBN = "979-8-89176-350-0",
abstract = "Recent advances in large language models have enabled impressive task-oriented applications, yet building emotionally intelligent chatbots for natural, strategic conversations remains challenging. Current approaches often assume a single ``ground truth'' for emotional responses, overlooking the subjectivity of human emotion. We present a novel perspectivist approach, SAGE, that models multiple perspectives in dialogue generation using latent variables. At its core is the State-Action Chain (SAC), which augments standard fine-tuning with latent variables capturing diverse emotional states and conversational strategies between turns, in a future-looking manner. During inference, these variables are generated before each response, enabling multi-perspective control while preserving natural interactions. We also introduce a self-improvement pipeline combining dialogue tree search, LLM-based reward modeling, and targeted fine-tuning to optimize conversational trajectories. Experiments show improved LLM-based judgments while maintaining strong general LLM performance. The discrete latent variables further enable search-based strategies and open avenues for state-level reinforcement learning in dialogue systems, where learning can occur at the state level rather than the token level."
}Markdown (Informal)
[SAGE: Steering Dialog Generation with Future-Aware State-Action Augmentation](https://preview.aclanthology.org/ingest-emnlp/2025.nlperspectives-1.11/) (Zhang & Jaitly, NLPerspectives 2025)
ACL