@inproceedings{olpadkar-etal-2025-llms,
title = "Can {LLM}s Be Efficient Predictors of Conversational Derailment?",
author = "Olpadkar, Kaustubh and
Bajaj, Vikram Sunil and
Barrett, Leslie",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-luhme/2025.findings-emnlp.816/",
doi = "10.18653/v1/2025.findings-emnlp.816",
pages = "15104--15112",
ISBN = "979-8-89176-335-7",
abstract = "Conversational derailment {---} when online discussions stray from their intended topics due to toxic or inappropriate remarks {---} is a common issue on online platforms. These derailments can have negative impacts on users and the online community. While previous work has focused on post hoc identification of toxic content, recent efforts emphasize proactive prediction of derailments before they occur, enabling early moderation. However, forecasting derailment is difficult due to the context-dependent emergence of toxicity and the need for timely alerts. We prompt pre-trained large language models (LLMs) to predict conversational derailment without task-specific fine-tuning. We compare a range of prompting strategies, including chain-of-thought reasoning (CoT) and few-shot exemplars, across small and large scale models, and evaluate their performance and inference-cost trade-offs on derailment benchmarks. Our experiments show that the best prompting configuration attains state-of-the-art performance, and forecasts derailments earlier than existing approaches. These results demonstrate that LLMs, even without fine-tuning, can serve as an effective tool for proactive conversational moderation."
}Markdown (Informal)
[Can LLMs Be Efficient Predictors of Conversational Derailment?](https://preview.aclanthology.org/ingest-luhme/2025.findings-emnlp.816/) (Olpadkar et al., Findings 2025)
ACL