@inproceedings{dai-etal-2024-deepseekmoe, title = "{D}eep{S}eek{M}o{E}: Towards Ultimate Expert Specialization in Mixture-of-Experts Language Models", author = "Dai, Damai and Deng, Chengqi and Zhao, Chenggang and Xu, R.x. and Gao, Huazuo and Chen, Deli and Li, Jiashi and Zeng, Wangding and Yu, Xingkai and Wu, Y. and Xie, Zhenda and Li, Y.k. and Huang, Panpan and Luo, Fuli and Ruan, Chong and Sui, Zhifang and Liang, Wenfeng", editor = "Ku, Lun-Wei and Martins, Andre and Srikumar, Vivek", booktitle = "Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)", month = aug, year = "2024", address = "Bangkok, Thailand", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2024.acl-long.70/", doi = "10.18653/v1/2024.acl-long.70", pages = "1280--1297" }