@inproceedings{li-etal-2026-thinkpilot,
title = "{T}hink{P}ilot: Steering Reasoning Models via Automated Think-prefixes Optimization",
author = "Li, Sunzhu and
Lin, Zhiyu and
Zhao, Jiale and
Yang, Shuling and
Wei, Chen",
editor = "Demberg, Vera and
Inui, Kentaro and
Marquez, Llu{\'i}s",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {EACL} 2026",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-eacl/2026.findings-eacl.185/",
pages = "3573--3592",
ISBN = "979-8-89176-386-9",
abstract = "Large Reasoning Models (LRMs) are powerful, but they still suffer from inefficient and off-target reasoning. Currently, training-free methods are limited to either rigid heuristics or descriptive, non-actionable analyses. In this paper, we introduce ThinkPilot, a training-free framework that automatically optimizes LRMs reasoning. It uses an evolutionary process to generate \textit{think-prefixes}, namely instructions that evolve driven by a taxonomy of \textit{reasoning behaviors} to guide models toward superior performance. Extensive experiments demonstrate ThinkPilot{'}s broad effectiveness: it significantly improves the accuracy-length trade-off for efficient reasoning, drastically improves safety (e.g., cutting the StrongREJECT score of DeepSeek-R1-Distill-Qwen-32B from 27.0{\%} to 0.7{\%}), and enhances instruction following. It also synergizes with existing training-based methods. Specially, our analysis reveals that think-prefixes can reliably control LRMs' reasoning behaviors, and that different tasks have strong preferences for specific behavioral distributions. By automatically identifying and eliciting these behaviors, ThinkPilot provides a generalizable framework for aligning LRMs reasoning with task demands."
}Markdown (Informal)
[ThinkPilot: Steering Reasoning Models via Automated Think-prefixes Optimization](https://preview.aclanthology.org/ingest-eacl/2026.findings-eacl.185/) (Li et al., Findings 2026)
ACL