@inproceedings{wadhwa-etal-2025-taught,
title = "Who Taught You That? Tracing Teachers in Model Distillation",
author = "Wadhwa, Somin and
Shaib, Chantal and
Amir, Silvio and
Wallace, Byron C",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/mtsummit-25-ingestion/2025.findings-acl.173/",
doi = "10.18653/v1/2025.findings-acl.173",
pages = "3307--3315",
ISBN = "979-8-89176-256-5",
abstract = "Model distillation {--} using outputs from a large teacher model to teach a small student model {--} is a practical means of creating efficient models for a particular task. We ask: Can we identify a students' teacher based on its outputs? Such ``footprints'' left by teacher LLMs would be interesting artifacts. Beyond this, reliable teacher inference may have practical implications as actors seek to distill specific capabilities of massive proprietary LLMs into deployed smaller LMs, potentially violating terms of service. We consider practical task distillation targets including summarization, question answering, and instruction-following. We assume a finite set of candidate teacher models, which we treat as blackboxes. We design discriminative models that operate over lexical features. We find that n-gram similarity alone is unreliable for identifying teachers, but part-of-speech (PoS) templates preferred by student models mimic those of their teachers."
}
Markdown (Informal)
[Who Taught You That? Tracing Teachers in Model Distillation](https://preview.aclanthology.org/mtsummit-25-ingestion/2025.findings-acl.173/) (Wadhwa et al., Findings 2025)
ACL