@inproceedings{zhang-etal-2022-moefication, title = "{M}o{E}fication: Transformer Feed-forward Layers are Mixtures of Experts", author = "Zhang, Zhengyan and Lin, Yankai and Liu, Zhiyuan and Li, Peng and Sun, Maosong and Zhou, Jie", editor = "Muresan, Smaranda and Nakov, Preslav and Villavicencio, Aline", booktitle = "Findings of the Association for Computational Linguistics: ACL 2022", month = may, year = "2022", address = "Dublin, Ireland", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2022.findings-acl.71/", doi = "10.18653/v1/2022.findings-acl.71", pages = "877--890" }