@inproceedings{storai-etal-2025-smarter,
title = "Smarter, Not Harder: Training-Free Adaptive Computation for Transformers",
author = {Stora{\"i}, Romain and
Lee, Jaeseong and
Hwang, Seung-won},
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/landing_page/2025.findings-acl.426/",
pages = "8147--8155",
ISBN = "979-8-89176-256-5",
abstract = "Adaptive Computation in Transformers (ACT) has been pursued in two directions: efficiency- and performance-focused. We study performance-focused ACT, or PACT, which invests more computation on hard steps to improve performance, such as by adding forward passes. We first discuss beam search and hesitation-based methods as PACT and their limitations. While the hesitation-based approach outperforms beam search by perturbing input embeddings, it suffers from inefficiency due to invalidating KVCache and exhibits instability due to its reliance on randomness. To address this, we propose IMPACT, a novel PACT method that perturbs network weights rather than input embeddings. This approach enables the reuse of KVCache, offers deterministic predictions, and significantly improves memory and computational efficiency. By achieving a better balance between performance and efficiency, IMPACT makes PACT accessible to communities with consumer-grade hardware."
}
Markdown (Informal)
[Smarter, Not Harder: Training-Free Adaptive Computation for Transformers](https://preview.aclanthology.org/landing_page/2025.findings-acl.426/) (Storaï et al., Findings 2025)
ACL