@inproceedings{press-etal-2020-improving, title = "Improving Transformer Models by Reordering their Sublayers", author = "Press, Ofir and Smith, Noah A. and Levy, Omer", editor = "Jurafsky, Dan and Chai, Joyce and Schluter, Natalie and Tetreault, Joel", booktitle = "Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics", month = jul, year = "2020", address = "Online", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2020.acl-main.270/", doi = "10.18653/v1/2020.acl-main.270", pages = "2996--3005" }