@inproceedings{ha-etal-2026-aha,
title = "From ``Aha Moments'' to Controllable Thinking: Toward Meta-Cognitive Reasoning in {LRM}s via Decoupled Reasoning and Control",
author = "Ha, Rui and
Pu, Rui and
Li, Chaozhuo and
Sun, Li and
Su, Sen",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.acl-long.304/",
pages = "6698--6710",
ISBN = "979-8-89176-390-6",
abstract = "Large Reasoning Models (LRMs) can exhibit step-by-step reasoning, reflection, and backtracking, but these behaviors are often unregulated, leading to overthinking. As a result, LRMs continue generating redundant reasoning even after reaching high-confidence conclusions. This increases inference cost and latency, limiting practical deployment. The root cause is the absence of an intrinsic mechanism to monitor the reasoning state and decide when to continue, backtrack, or stop. We propose MERA, a meta-cognitive reasoning framework that decouples reasoning from control to enable independent optimization of control strategies. MERA constructs high-quality reasoning{--}control supervision data via a takeover-based pipeline, and transforms long-horizon traces into structured reasoning{--}control alternating sequences for training. The model is trained with supervised fine-tuning to internalize the structured separation, and further optimized with Control-Segment Policy Optimization (CSPO), which combines segment-wise GRPO with control masking to focus learning on control segments. Experiments across reasoning benchmarks show that MERA improves both efficiency and accuracy."
}Markdown (Informal)
[From "Aha Moments" to Controllable Thinking: Toward Meta-Cognitive Reasoning in LRMs via Decoupled Reasoning and Control](https://preview.aclanthology.org/ingest-acl/2026.acl-long.304/) (Ha et al., ACL 2026)
ACL