@inproceedings{wang-etal-2026-madra,
title = "{MADRA}: Multi-Agent Debate for Risk-Aware Embodied Planning",
author = "Wang, JunJian and
Zhao, Lidan and
Zhang, Xi Sheryl",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.findings-acl.340/",
pages = "6852--6876",
ISBN = "979-8-89176-395-1",
abstract = "Large Language Models (LLMs) exhibit impressive reasoning capabilities but often suffer from Embodied Semantic Hallucinations{---}generating plans that are semantically fluent but physically unsafe due to a lack of grounded common sense. Existing safety alignment methods, such as RLHF or naive safety prompting, typically fall into a Safety-Utility Trade-off, resulting in severe over-rejection of benign household instructions. To address this, we propose MADRA (Multi-Agent Debate for Risk Awareness), a training-free cognitive architecture that mimics System-2 deliberation. MADRA introduces a meta-cognitive Critical Agent that evaluates peer debates using a structured argumentation framework derived from the Toulmin Model, effectively mitigating the ``herd mentality'' in multi-agent systems. We also introduce SafeAware-VH, a benchmark featuring adversarial safe instructions designed to probe agents' sensitivity to physical risks. Extensive experiments demonstrate that MADRA breaks the Pareto frontier, achieving over 90{\%} rejection of unsafe tasks while maintaining high utility, significantly outperforming standard Chain-of-Thought and single-agent reflection baselines."
}Markdown (Informal)
[MADRA: Multi-Agent Debate for Risk-Aware Embodied Planning](https://preview.aclanthology.org/ingest-acl/2026.findings-acl.340/) (Wang et al., Findings 2026)
ACL