@inproceedings{li-etal-2026-efficient-transformer,
title = "Efficient Transformer Parameter Reuse via Zero-Token Mechanism",
author = "Li, Guanghao and
Jiang, Wenhao and
Shen, Li and
Tang, Ming and
Yuan, Chun",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.findings-acl.711/",
pages = "14498--14515",
ISBN = "979-8-89176-395-1",
abstract = "Resource constraints often limit the parameter capacity of Large Language Models (LLMs), thereby hindering their performance. Although existing approaches leverage parameter sharing to reuse a fixed set of parameters within constrained budgets, they typically require each layer to fulfill multiple roles over a fixed number of iterations. This design compromises both efficiency and adaptability. In this work, we propose the **Zero Token Transformer (ZTT)**, which employs a head-tail decoupled parameter cycling strategy. Specifically, we decouple the first (head) and last (tail) layers from the parameter cycling process, enabling iterative refinement solely within the intermediate layers. Furthermore, we introduce a Zero-Token Mechanism, wherein a virtual token with a trainable key and a zero-valued vector functions as a standard token. The resulting attention scores not only reflect the computational significance of each layer but also facilitate dynamic early exiting, thereby preserving overall model accuracy. Our approach achieves superior performance under strict parameter constraints, substantially reduces computational overhead via early exits, and can be seamlessly integrated into the fine-tuning of existing pre-trained models, improving both efficiency and adaptability."
}Markdown (Informal)
[Efficient Transformer Parameter Reuse via Zero-Token Mechanism](https://preview.aclanthology.org/ingest-acl/2026.findings-acl.711/) (Li et al., Findings 2026)
ACL