@inproceedings{singh-2025-teacher,
title = "From Teacher to Student: Tracking Memorization Through Model Distillation",
author = "Singh, Simardeep",
editor = "Jia, Robin and
Wallace, Eric and
Huang, Yangsibo and
Pimentel, Tiago and
Maini, Pratyush and
Dankers, Verna and
Wei, Johnny and
Lesci, Pietro",
booktitle = "Proceedings of the First Workshop on Large Language Model Memorization (L2M2)",
month = aug,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/display_plenaries/2025.l2m2-1.6/",
pages = "78--82",
ISBN = "979-8-89176-278-7",
abstract = "Large language models (LLMs) are known to memorize parts of their training data, raising important concerns around privacy and security. While previous research has focused on studying memorization in pre-trained models, much less is known about how knowledge distillation (KD) affects memorization.In this study, we explore how different KD methods influence the memorization of fine-tuned task data when a large teacher model is distilled into smaller student variants.This study demonstrates that distilling a larger teacher model, fine-tuned on a dataset, into a smaller variant not only lowers computational costs and model size but also significantly reduces the memorization risks compared to standard fine-tuning approaches."
}
Markdown (Informal)
[From Teacher to Student: Tracking Memorization Through Model Distillation](https://preview.aclanthology.org/display_plenaries/2025.l2m2-1.6/) (Singh, L2M2 2025)
ACL