@inproceedings{zhao-etal-2026-mark,
title = "The Mark Fades: Adaptive Evolutionary Paraphrase-based Attack against {LLM} Watermarks",
author = "Zhao, Yusheng and
Zhao, Jian and
Zhang, Tianle and
Wei, Feng and
Li, Xuelong",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.findings-acl.459/",
pages = "9428--9445",
ISBN = "979-8-89176-395-1",
abstract = "While LLM watermarking is essential for machine- generated content identification, existing paraphrase-based attacks struggle to balance watermark removal efficacy with text quality. We propose TSAPA, a training-free evolutionary framework that models watermark removal as a constrained multi-objective optimization problem. By leveraging genetic algorithms to navigate the Pareto front, TSAPA utilizes a Pseudo-Log-Likelihood (PLL)-guided mutation to precisely target and modify watermark-carrying tokens. Experiments on Qwen3 series (1.7B/8B/32B) across multiple watermark schemes show that TSAPA achieves over 90{\%} attack success rate (ASR) while maintaining high text semantic fidelity, significantly outperforming baselines methods. This work exposes critical vulnerabilities in current watermarks and provides a new perspective for robust evaluation."
}Markdown (Informal)
[The Mark Fades: Adaptive Evolutionary Paraphrase-based Attack against LLM Watermarks](https://preview.aclanthology.org/ingest-acl/2026.findings-acl.459/) (Zhao et al., Findings 2026)
ACL