@inproceedings{ying-etal-2026-robofailring,
title = "{R}obo{F}ail{R}ing: Retrieval-Augmented and Language Grounding Failure Detection for {VLM}-enabled Robotic Manipulation",
author = "Ying, Chenduo and
Du, Linkang and
Shu, Yuanchao and
Cheng, Peng",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.acl-long.602/",
pages = "13188--13202",
ISBN = "979-8-89176-390-6",
abstract = "Reliable failure detection and causal reasoning are critical in robotic manipulation, as their absence risks robot damage and endangers human safety.Although recent Vision{--}Language Models (VLMs) are employed to attempt failure detection and causality reasoning, they typically make retrospective assessment only after task completion, and their reasoning accuracy is often limited.To address these issues, we introduce RoboFailRing, which enables timely failure detection during task execution and enhances the reasoning accuracy of VLMs.It achieves rapid failure detection by retrieving a pre-constructed failure memory and returning a similarity-based decision.In addition, by providing grounded failure report to VLMs, it improves the accuracy of their reasoning about the failure causes and repair strategies.We evaluate RoboFailRing on two large-scale simulated datasets comprising over 6,000 failure trajectories and covering 81 distinct manipulation tasks.The results show that the average success rate of out-of-distribution failure detection reaches 80{\%}, while the mean detection time is cut to roughly 50{\%} of the baseline.Moreover, evaluations on real-world systems show an average 35{\%} gain in VLM failure-reasoning accuracy.We make our code publicly available at: https://github.com/DynamicPoet/RoboFailRing."
}Markdown (Informal)
[RoboFailRing: Retrieval-Augmented and Language Grounding Failure Detection for VLM-enabled Robotic Manipulation](https://preview.aclanthology.org/ingest-acl/2026.acl-long.602/) (Ying et al., ACL 2026)
ACL