@inproceedings{badhe-shah-2026-prompt,
title = "Prompt-Level Distillation: A Non-Parametric Alternative to Model Fine-Tuning for Efficient Reasoning",
author = "Badhe, Sanket and
Shah, Deep",
editor = "Li, Yunyao and
Rehm, Georg and
Tu, Mei",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics ({ACL} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.acl-industry.142/",
pages = "2131--2147",
ISBN = "979-8-89176-394-4",
abstract = "Advanced reasoning typically requires Chain-of-Thought prompting, which is accurate but incurs prohibitive latency and substantial test-time inference costs. The standard alternative, fine-tuning smaller models, often sacrifices interpretability while introducing significant resource and operational overhead. To address these limitations, we introduce Prompt-Level Distillation (PLD). We extract explicit reasoning patterns from a Teacher model and organize them into a structured list of expressive instructions for the Student model{'}s System Prompt. Evaluated using Gemma-3 4B, PLD improved Macro F1 scores on StereoSet (57{\%} to 90.0{\%}) and Contract-NLI (67{\%} to 83{\%}), while increasing LogiQA accuracy to 70{\%}. Similar results on Mistral Small 3.1 demonstrate cross-architecture generalizability, enabling these compact models to match frontier performance with negligible latency overhead. These expressive instructions render the decision-making process transparent, allowing for full human verification of logic, making this approach ideal for regulated industries such as law, finance, and content moderation, as well as high-volume use cases and edge devices."
}Markdown (Informal)
[Prompt-Level Distillation: A Non-Parametric Alternative to Model Fine-Tuning for Efficient Reasoning](https://preview.aclanthology.org/ingest-acl/2026.acl-industry.142/) (Badhe & Shah, ACL 2026)
ACL