@inproceedings{le-jeune-etal-2025-realharm,
title = "{R}eal{H}arm: A Collection of Real-World Language Model Application Failures",
author = "Le Jeune, Pierre and
Liu, Jiaen and
Rossi, Luca and
Dora, Matteo",
editor = "Novikova, Jekaterina",
booktitle = "Proceedings of the The First Workshop on LLM Security (LLMSEC)",
month = aug,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/corrections-2025-08/2025.llmsec-1.7/",
pages = "87--100",
ISBN = "979-8-89176-279-4",
abstract = "Language model deployments in consumer-facing applications introduce numerous risks. While existing research on harms and hazards of such applications follows top-down approaches derived from regulatory frameworks and theoretical analyses, empirical evidence of real-world failure modes remains underexplored. In this work, we introduce RealHarm, a dataset of annotated problematic interactions with AI agents built from a systematic review of publicly reported incidents. Analyzing harms, causes, and hazards specifically from the deployer{'}s perspective, we find that reputational damage constitutes the predominant organizational harm, while misinformation emerges as the most common hazard category. We empirically evaluate state-of-the-art guardrails and content moderation systems to probe whether such systems would have prevented the incidents, revealing a significant gap in the protection of AI applications."
}
Markdown (Informal)
[RealHarm: A Collection of Real-World Language Model Application Failures](https://preview.aclanthology.org/corrections-2025-08/2025.llmsec-1.7/) (Le Jeune et al., LLMSEC 2025)
ACL