@inproceedings{elhoushi-etal-2024-layerskip, title = "{L}ayer{S}kip: Enabling Early Exit Inference and Self-Speculative Decoding", author = "Elhoushi, Mostafa and Shrivastava, Akshat and Liskovich, Diana and Hosmer, Basil and Wasti, Bram and Lai, Liangzhen and Mahmoud, Anas and Acun, Bilge and Agarwal, Saurabh and Roman, Ahmed and Aly, Ahmed and Chen, Beidi and Wu, Carole-Jean", editor = "Ku, Lun-Wei and Martins, Andre and Srikumar, Vivek", booktitle = "Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)", month = aug, year = "2024", address = "Bangkok, Thailand", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/Author-page-Marten-During-lu/2024.acl-long.681/", doi = "10.18653/v1/2024.acl-long.681", pages = "12622--12642" }