@inproceedings{takase-kiyono-2023-lessons,
title = "Lessons on Parameter Sharing across Layers in Transformers",
author = "Takase, Sho and
Kiyono, Shun",
editor = "Sadat Moosavi, Nafise and
Gurevych, Iryna and
Hou, Yufang and
Kim, Gyuwan and
Kim, Young Jin and
Schuster, Tal and
Agrawal, Ameeta",
booktitle = "Proceedings of the Fourth Workshop on Simple and Efficient Natural Language Processing (SustaiNLP)",
month = jul,
year = "2023",
address = "Toronto, Canada (Hybrid)",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.sustainlp-1.5/",
doi = "10.18653/v1/2023.sustainlp-1.5",
pages = "78--90"
}
Markdown (Informal)
[Lessons on Parameter Sharing across Layers in Transformers](https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.sustainlp-1.5/) (Takase & Kiyono, sustainlp 2023)
ACL