@inproceedings{wang-etal-2025-unleashing,
title = "Unleashing the Reasoning Potential of {LLM}s by Critique Fine-Tuning on One Problem",
author = "Wang, Yubo and
Nie, Ping and
Zou, Kai and
Wu, Lijun and
Chen, Wenhu",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-main.149/",
pages = "3017--3027",
ISBN = "979-8-89176-332-6",
abstract = "Critique Fine-Tuning (CFT) has recently emerged as a promising paradigm for unlocking the reasoning capabilities of large language models (LLMs). In this work, we introduce one-shot CFT, a highly compute-efficient approach that leverages critique data generated from a single math problem. Remarkably, this method yields significant gains in reasoning accuracy, surpassing one-shot RLVR (Reinforcement Learning with Verifiable Reward) while requiring 15 to 20 times less compute. Given one math problem, we first prompt a set of diverse small models to produce candidate solutions, then use frontier models such as GPT-4.1 to generate high-quality critiques of these responses. We fine-tune Qwen and Llama family models ranging from 1.5B to 14B parameters with CFT. With just 5 GPU hours, our models achieve up to a 16 percent absolute improvement in average accuracy across six mathematical reasoning benchmarks (for example, Qwen2.5-Math-7B improves from 26 percent to 42 percent). Furthermore, ablation studies reveal the robustness of one-shot CFT across different prompt problems. Our findings suggest an extremely compute-efficient approach to unleash the reasoning potential of LLMs."
}Markdown (Informal)
[Unleashing the Reasoning Potential of LLMs by Critique Fine-Tuning on One Problem](https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-main.149/) (Wang et al., EMNLP 2025)
ACL