@inproceedings{baek-etal-2026-trm,
title = "{TRM}-Planner: Offline Target Planning and Distillation for Tiny Recursive Models",
author = "Baek, Euijin and
Babiker, Housam and
Kim, Mi-Young and
Goebel, Randy",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.findings-acl.1350/",
pages = "27058--27070",
ISBN = "979-8-89176-395-1",
abstract = "Tiny Recursive Models (TRMs) perform iterative reasoning with an Adaptive Computation Time (ACT)-style loop, but their supervised training targets can be brittle, and their halting behavior can be difficult to tune. We introduce TRM-Planner, a two-stage teacher-cache distillation recipe that shifts compute to an offline teacher-cache stage. A frozen TRM checkpoint is unrolled for multiple refinement steps and stochastic rollouts; for each instance, we cache a small set of teacher entries (tokens, logits, step index, and quality metadata). A student TRM is then trained with the standard TRM objective plus a distillation loss computed from cached entries. Across Sudoku-Extreme and ARC-AGI-1/2, TRM-Planner shows an improvement over our reproduced TRM baseline while leaving student-time inference unchanged. On ARC1/ARC2 with 7M parameters, the two-attempt accuracy (pass@2) increases from 43.1{\%} to 48.1{\%} and 6.7{\%} to 9.2{\%}, respectively."
}Markdown (Informal)
[TRM-Planner: Offline Target Planning and Distillation for Tiny Recursive Models](https://preview.aclanthology.org/ingest-acl/2026.findings-acl.1350/) (Baek et al., Findings 2026)
ACL