@inproceedings{zuo-etal-2026-adaptive,
title = "Adaptive Test-Time Compute Allocation with Evolving In-Context Demonstrations",
author = "Zuo, Bowen and
Zhou, Dongruo and
Zhu, Yinglun",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/bulk-corrections-2026-07-02/2026.findings-acl.1754/",
doi = "10.18653/v1/2026.findings-acl.1754",
pages = "35156--35173",
ISBN = "979-8-89176-395-1",
abstract = "While scaling test-time compute can substantially improve model performance, existing approaches either rely on static compute allocation or sample from fixed generation distributions.In this work, we introduce a test-time compute allocation framework that jointly adapts where computation is spent and how generation is performed. Our method begins with a warm-up phase that identifies easy queries and assembles an initial pool of question-response pairs from the test set itself. An adaptive phase then concentrates further computation on unresolved queries while reshaping their generation distributions through evolving in-context demonstrations{---}conditioning each generation on successful responses from semantically related queries rather than resampling from a fixed distribution.Experiments across math, coding, and reasoning benchmarks demonstrate that our approach consistently outperforms existing baselines while consuming substantially less inference-time compute."
}Markdown (Informal)
[Adaptive Test-Time Compute Allocation with Evolving In-Context Demonstrations](https://preview.aclanthology.org/bulk-corrections-2026-07-02/2026.findings-acl.1754/) (Zuo et al., Findings 2026)
ACL