@inproceedings{wang-etal-2026-lightreasoner,
title = "{L}ight{R}easoner: Can Small Language Models Teach Large Language Models Reasoning?",
author = "Wang, Jingyuan and
Chen, Yankai and
Li, Zhonghang and
Huang, Chao",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.acl-long.122/",
pages = "2635--2663",
ISBN = "979-8-89176-390-6",
abstract = "Large language models (LLMs) have demonstrated remarkable progress in reasoning, often through supervised fine-tuning (SFT). However, SFT is resource-intensive, relying on large curated datasets, rejection-sampled demonstrations, and uniform optimization across all tokens{---}even though only a fraction carry meaningful learning value. In this work, we explore a counterintuitive idea: can smaller language models (SLMs) teach larger language models (LLMs) by revealing high-value reasoning moments that reflect the latter{'}s unique strength? We propose LightReasoner, a novel framework that leverages the behavioral divergence between a stronger expert model (LLM) and a weaker amateur model (SLM). LightReasoner operates in two stages: (1) a sampling stage that pinpoints critical reasoning moments and constructs supervision examples capturing the expert{'}s advantage through expert{--}amateur contrast, and (2) a fine-tuning stage that aligns the expert model with these distilled examples, amplifying its reasoning strengths. Across seven benchmarks, LightReasoner improves accuracy by up to 28.1{\%}, while reducing time consumption by 90{\%}, sampled problems by 80{\%}, and tuned token usage by 99{\%}, all without relying on ground-truth labels. By turning weaker SLMs into effective teaching signals, LightReasoner offers a scalable and resource-efficient approach for advancing LLM reasoning."
}Markdown (Informal)
[LightReasoner: Can Small Language Models Teach Large Language Models Reasoning?](https://preview.aclanthology.org/ingest-acl/2026.acl-long.122/) (Wang et al., ACL 2026)
ACL