@inproceedings{zhang-wan-2026-rst,
title = "{RST}-Guarder: Enhancing Long-Context Robustness for Safeguards via {RST} Parsing and Probabilistic Inference",
author = "Zhang, Xu and
Wan, Xiaojun",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.acl-long.1025/",
pages = "22397--22413",
ISBN = "979-8-89176-390-6",
abstract = "As large language models (LLMs) demonstrate remarkable capabilities across a wide range of tasks, ensuring the safety of their outputs is increasingly critical. To mitigate the risk of policy-violating responses, numerous guardrail models have been developed for harmful-content detection. While effective on short outputs, existing guardrails degrade on long-form responses, reflecting limited semantic understanding and weak robustness to contextual noise. To address these limitations, we propose RST-Guarder, an inference-time method that improves harmful-content detection for long-form inputs without additional data curation or model training. RST-Guarder first applies a RST parser to long-form inputs to get discourse-level semantic relations among segments, and subsequently performs hierarchical probabilistic inference to aggregate segment-level safety scores produced by pre-trained guardrail models. We evaluate RST-Guarder across multiple benchmarks and a diverse set of widely used guardrail models. Experimental results demonstrate that RST-Guarder consistently improves harmful-content detection on long-form inputs, while significantly reducing false positives that incorrectly classify benign content as harmful."
}Markdown (Informal)
[RST-Guarder: Enhancing Long-Context Robustness for Safeguards via RST Parsing and Probabilistic Inference](https://preview.aclanthology.org/ingest-acl/2026.acl-long.1025/) (Zhang & Wan, ACL 2026)
ACL