@inproceedings{yang-etal-2024-moe,
    title = "{M}o{E}-I$^2$: Compressing Mixture of Experts Models through Inter-Expert Pruning and Intra-Expert Low-Rank Decomposition",
    author = "Yang, Cheng  and
      Sui, Yang  and
      Xiao, Jinqi  and
      Huang, Lingyi  and
      Gong, Yu  and
      Duan, Yuanlin  and
      Jia, Wenqi  and
      Yin, Miao  and
      Cheng, Yu  and
      Yuan, Bo",
    editor = "Al-Onaizan, Yaser  and
      Bansal, Mohit  and
      Chen, Yun-Nung",
    booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",
    month = nov,
    year = "2024",
    address = "Miami, Florida, USA",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/fix-sig-urls/2024.findings-emnlp.612/",
    doi = "10.18653/v1/2024.findings-emnlp.612",
    pages = "10456--10466"
}