@inproceedings{niess-kern-2025-ensemble,
title = "Ensemble Watermarks for Large Language Models",
author = "Niess, Georg and
Kern, Roman",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingestion-acl-25/2025.acl-long.145/",
pages = "2903--2916",
ISBN = "979-8-89176-251-0",
abstract = "As large language models (LLMs) reach human-like fluency, reliably distinguishing AI-generated text from human authorship becomes increasingly difficult. While watermarks already exist for LLMs, they often lack flexibility and struggle with attacks such as paraphrasing. To address these issues, we propose a multi-feature method for generating watermarks that combines multiple distinct watermark features into an ensemble watermark. Concretely, we combine acrostica and sensorimotor norms with the established red-green watermark to achieve a 98{\%} detection rate. After a paraphrasing attack, the performance remains high with 95{\%} detection rate. In comparison, the red-green feature alone as a baseline achieves a detection rate of 49{\%} after paraphrasing. The evaluation of all feature combinations reveals that the ensemble of all three consistently has the highest detection rate across several LLMs and watermark strength settings. Due to the flexibility of combining features in the ensemble, various requirements and trade-offs can be addressed. Additionally, the same detection function can be used without adaptations for all ensemble configurations. This method is particularly of interest to facilitate accountability and prevent societal harm."
}
Markdown (Informal)
[Ensemble Watermarks for Large Language Models](https://preview.aclanthology.org/ingestion-acl-25/2025.acl-long.145/) (Niess & Kern, ACL 2025)
ACL
- Georg Niess and Roman Kern. 2025. Ensemble Watermarks for Large Language Models. In Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 2903–2916, Vienna, Austria. Association for Computational Linguistics.