@inproceedings{madani-srihari-2025-esc,
title = "{ESC}-Judge: A Framework for Comparing Emotional Support Conversational Agents",
author = "Madani, Navid and
Srihari, Rohini",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-main.811/",
pages = "16059--16076",
ISBN = "979-8-89176-332-6",
abstract = "Large Language Models (LLMs) increasingly power mental-health chatbots, yet the field still lacks a scalable, theory-grounded way to decide which model is more effective to deploy. We present ESC-Judge, the first end-to-end evaluation framework that (i) grounds head-to-head comparison of Emotional-Support LLMs (ES-LLMs) in an established psychological theory{---}Clara Hill{'}s Exploration{--}Insight{--}Action (E-I-A) counselling model{---}thereby delivering a structured, interpretable lens on performance, and (ii) fully automates the pipeline at scale. ESC-Judge proceeds in three stages: (1) it synthesizes realistic help-seeker roles by sampling empirically salient attributes (stressors, personality, life history); (2) it has two candidate ES-Agents conduct separate sessions with the same role, isolating model-specific strategies; and (3) it asks a specialised judge LLM to issue pairwise preferences across rubric-anchored skills that exhaustively cover the E-I-A spectrum. In our empirical study, ESC-Judge matches PhD-level annotators in 85{\%} of Exploration, 83{\%} of Insight, and 86{\%} of Action decisions, demonstrating human-level reliability at a fraction of the cost. We release all code, prompts, synthetic roles, transcripts, and judgment scripts to catalyze transparent progress in emotionally supportive AI"
}Markdown (Informal)
[ESC-Judge: A Framework for Comparing Emotional Support Conversational Agents](https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-main.811/) (Madani & Srihari, EMNLP 2025)
ACL