@inproceedings{ngong-etal-2025-differentially,
title = "Differentially Private Learning Needs Better Model Initialization and Self-Distillation",
author = "Ngong, Ivoline C. and
Near, Joseph and
Mireshghallah, Niloofar",
editor = "Chiruzzo, Luis and
Ritter, Alan and
Wang, Lu",
booktitle = "Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)",
month = apr,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/Ingest-2025-COMPUTEL/2025.naacl-long.455/",
pages = "9009--9027",
ISBN = "979-8-89176-189-6",
abstract = "Differentially private SGD (DPSGD) enables privacy-preserving training of language models, but often reduces utility, diversity, and linguistic quality. We introduce DPRefine, a three-phase method that initializes a model using data synthesis from a small pre-trained LM with rigorous filtering, applies DP finetuning on private data, and performs self-distillation to refine outputs. This approach significantly outperforms vanilla DPSGD, with AlpacaEval preferring DPRefine`s generations in 78.38{\%} of cases across all datasets and metrics, while also demonstrating substantial improvements in lexical diversity, achieving 85.31{\%} in MSTTR and 86.82{\%} in Jaccard similarity. Our fine-grained analysis reveals that DPRefine reduces linguistic errors in generated text by 84{\%}, mitigating grammar errors, spelling mistakes, and missing punctuation commonly associated with DPSGD. It also reduces inconsistencies present in non-private models, such as fabricated details and misattributed quotes. We find that small models like GPT-2 and T5 are effective for initialization and distillation, highlighting their potential in enabling scalable and efficient deployment of high-performing, privacy-preserving language models with improved linguistic quality and consistency."
}
Markdown (Informal)
[Differentially Private Learning Needs Better Model Initialization and Self-Distillation](https://preview.aclanthology.org/Ingest-2025-COMPUTEL/2025.naacl-long.455/) (Ngong et al., NAACL 2025)
ACL