@inproceedings{gu-etal-2026-mash,
title = "{MASH}: Evading Black-Box {AI}-Generated Text Detectors via Style Humanization",
author = "Gu, Yongtong and
Li, Songze and
Hu, Xia",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.findings-acl.1487/",
pages = "29749--29769",
ISBN = "979-8-89176-395-1",
abstract = "The increasing misuse of AI-generated texts (AIGT) has motivated the rapid development of AIGT detection methods. However, the reliability of these detectors remains fragile against adversarial evasions. Existing attack strategies often rely on white-box assumptions or demand prohibitively high computational and interaction costs, rendering them ineffective under practical black-box scenarios. In this paper, we propose Multi-stage Alignment for Style Humanization (MASH), a novel framework that evades black-box detectors based on style transfer. MASH sequentially employs style-injection supervised fine-tuning, direct preference optimization, and inference-time refinement to shape the distributions of AI-generated texts to resemble those of human-written texts. Experiments across 6 datasets and 5 detectors demonstrate the superior performance of MASH over 11 baseline evaders. Specifically, MASH achieves an average Attack Success Rate (ASR) of 92{\%}, surpassing the strongest baselines by an average of 24{\%}, while maintaining superior linguistic quality."
}Markdown (Informal)
[MASH: Evading Black-Box AI-Generated Text Detectors via Style Humanization](https://preview.aclanthology.org/ingest-acl/2026.findings-acl.1487/) (Gu et al., Findings 2026)
ACL