@inproceedings{heo-etal-2025-ringformer,
title = "{R}ing{F}ormer: Rethinking Recurrent Transformer with Adaptive Level Signals",
author = "Heo, Jaemu and
Fozilov, Eldor and
Song, Hyunmin and
Kim, Taehwan",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.findings-emnlp.1182/",
doi = "10.18653/v1/2025.findings-emnlp.1182",
pages = "21675--21686",
ISBN = "979-8-89176-335-7",
abstract = "Transformers have achieved great success in effectively processing sequential data such as text. Their architecture consisting of several attention and feedforward blocks can model relations between elements of a sequence in parallel manner, which makes them very efficient to train and effective in sequence modeling. Even though they have shown strong performance in processing sequential data, the size of their parameters is considerably larger when compared to other architectures such as RNN and CNN based models. Therefore, several approaches have explored parameter sharing and recurrence in Transformer models to address their computational demands. However, such methods struggle to maintain high performance compared to the original transformer model. To address this challenge, we propose our novel approach, RingFormer, which employs one Transformer layer that processes input repeatedly in a circular, ring-like manner, while utilizing low-rank matrices to generate input-dependent level signals. This allows us to reduce the model parameters substantially while maintaining high performance in a variety of tasks such as translation and image classification, as validated in the experiments."
}Markdown (Informal)
[RingFormer: Rethinking Recurrent Transformer with Adaptive Level Signals](https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.findings-emnlp.1182/) (Heo et al., Findings 2025)
ACL