@inproceedings{lee-etal-2022-deduplicating, title = "Deduplicating Training Data Makes Language Models Better", author = "Lee, Katherine and Ippolito, Daphne and Nystrom, Andrew and Zhang, Chiyuan and Eck, Douglas and Callison-Burch, Chris and Carlini, Nicholas", editor = "Muresan, Smaranda and Nakov, Preslav and Villavicencio, Aline", booktitle = "Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)", month = may, year = "2022", address = "Dublin, Ireland", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/landing_page/2022.acl-long.577/", doi = "10.18653/v1/2022.acl-long.577", pages = "8424--8445" }