@inproceedings{zhu-etal-2023-badge, title = "{BADGE}: Speeding Up {BERT} Inference after Deployment via Block-wise Bypasses and Divergence-based Early Exiting", author = "Zhu, Wei and Wang, Peng and Ni, Yuan and Xie, Guotong and Wang, Xiaoling", editor = "Sitaram, Sunayana and Beigman Klebanov, Beata and Williams, Jason D", booktitle = "Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 5: Industry Track)", month = jul, year = "2023", address = "Toronto, Canada", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.acl-industry.48/", doi = "10.18653/v1/2023.acl-industry.48", pages = "500--509" }