@inproceedings{lv-etal-2023-lightformer,
title = "{L}ight{F}ormer: Light-weight Transformer Using {SVD}-based Weight Transfer and Parameter Sharing",
author = "Lv, Xiuqing and
Zhang, Peng and
Li, Sunzhu and
Gan, Guobing and
Sun, Yueheng",
editor = "Rogers, Anna and
Boyd-Graber, Jordan and
Okazaki, Naoaki",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2023",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.findings-acl.656/",
doi = "10.18653/v1/2023.findings-acl.656",
pages = "10323--10335",
abstract = "Transformer has become an important technique for natural language processing tasks with great success. However, it usually requires huge storage space and computational cost, making it difficult to be deployed on resource-constrained edge devices. To compress and accelerate Transformer, we propose LightFormer, which adopts a low-rank factorization initialized by SVD-based weight transfer and parameter sharing. The SVD-based weight transfer can effectively utilize the well-trained Transformer parameter knowledge to speed up the model convergence, and effectively alleviate the low-rank bottleneck problem combined with parameter sharing. We validate our method on machine translation, text summarization and text classification tasks. Experiments show that on IWSLT`14 De-En and WMT`14 En-De, LightFormer achieves similar performance to the baseline Transformer with 3.8 times and 1.8 times fewer parameters, and achieves 2.3 times speedup and 1.5 times speedup respectively, generally outperforming recent light-weight Transformers."
}
Markdown (Informal)
[LightFormer: Light-weight Transformer Using SVD-based Weight Transfer and Parameter Sharing](https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.findings-acl.656/) (Lv et al., Findings 2023)
ACL