@inproceedings{ajith-etal-2024-downstream,
title = "Downstream Trade-offs of a Family of Text Watermarks",
author = "Ajith, Anirudh and
Singh, Sameer and
Pruthi, Danish",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.findings-emnlp.821/",
doi = "10.18653/v1/2024.findings-emnlp.821",
pages = "14039--14053",
abstract = "Watermarking involves implanting an imperceptible signal into generated text that can later be detected via statistical tests. A prominent family of watermarking strategies for LLMs embeds this signal by upsampling a (pseudorandomly-chosen) subset of tokens at every generation step. However, such signals alter the model`s output distribution and can have unintended effects on its downstream performance. In this work, we evaluate the performance of LLMs watermarked using three different strategies over a diverse suite of tasks including those cast as k-class classification (CLS), multiple choice question answering (MCQ), short-form generation (e.g., open-ended question answering) and long-form generation (e.g., translation) tasks. We find that watermarks (under realistic hyperparameters) can cause significant drops in LLMs' effective utility across all tasks. We observe drops of 10 to 20{\%} in CLS tasks in the average case, which shoot up to 100{\%} in the worst case. We notice degradations of about 7{\%} in MCQ tasks, 10-15{\%} in short-form generation, and 5-15{\%} in long-form generation tasks. Our findings highlight the trade-offs that users should be cognizant of when using watermarked models."
}
Markdown (Informal)
[Downstream Trade-offs of a Family of Text Watermarks](https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.findings-emnlp.821/) (Ajith et al., Findings 2024)
ACL
- Anirudh Ajith, Sameer Singh, and Danish Pruthi. 2024. Downstream Trade-offs of a Family of Text Watermarks. In Findings of the Association for Computational Linguistics: EMNLP 2024, pages 14039–14053, Miami, Florida, USA. Association for Computational Linguistics.