@inproceedings{wadhwa-etal-2024-investigating,
title = "Investigating Mysteries of {C}o{T}-Augmented Distillation",
author = "Wadhwa, Somin and
Amir, Silvio and
Wallace, Byron C",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2024.emnlp-main.349/",
doi = "10.18653/v1/2024.emnlp-main.349",
pages = "6071--6086",
abstract = "Eliciting chain of thought (CoT) rationales - sequences of token that convey a ``reasoning'' process has been shown to consistently improve LLM performance on tasks like question answering. More recent efforts have shown that such rationales can also be used for model distillation: Including CoT sequences (elicited from a large ``teacher'' model) in addition to target labels when fine-tuning a small student model yields (often substantial) improvements. In this work we ask: Why and how does this additional training signal help in model distillation? We perform ablations to interrogate this, and report some potentially surprising results. Specifically: (1) Placing CoT sequences after labels (rather than before) realizes consistently better downstream performance {--} this means that no student ``reasoning'' is necessary at test time to realize gains. (2) When rationales are appended in this way, they need not be coherent reasoning sequences to yield improvements; performance increases are robust to permutations of CoT tokens, for example. In fact, (3) a small number of key tokens are sufficient to achieve improvements equivalent to those observed when full rationales are used in model distillation."
}
Markdown (Informal)
[Investigating Mysteries of CoT-Augmented Distillation](https://preview.aclanthology.org/fix-sig-urls/2024.emnlp-main.349/) (Wadhwa et al., EMNLP 2024)
ACL
- Somin Wadhwa, Silvio Amir, and Byron C Wallace. 2024. Investigating Mysteries of CoT-Augmented Distillation. In Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing, pages 6071–6086, Miami, Florida, USA. Association for Computational Linguistics.