@inproceedings{yoon-choi-2026-compel,
title = "{COMPEL}: Compensated Mixture-of-Experts Pruning with Expert-Layer distribution",
author = "Yoon, Seohee and
Choi, Yong Suk",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.findings-acl.1521/",
pages = "30408--30423",
ISBN = "979-8-89176-395-1",
abstract = "Mixture-of-Experts (MoE) architectures have emerged as an effective approach for scaling Large Language Models (LLMs) by activating only a subset of experts during inference. Despite their computational efficiency, MoE models incur a substantial memory bottleneck from maintaining all expert parameters during inference. To address this challenge, numerous MoE pruning methods have been proposed. However, most existing methods adopt uniform pruning across layers, which fails to capture layer-wise variations in expert importance and redundancy. In this paper, we propose COmpensated MoE Pruning with Expert-Layer distribution (COMPEL). COMPEL performs layer-adaptive expert pruning by estimating expert importance using Fisher information and deriving layer importance from layer-wise outlier distributions, enabling pruning decisions that capture layer-wise heterogeneity. Furthermore, to mitigate performance degradation resulting from expert pruning, we propose a Fisher information guided expert weight compensation method. Experimental results on the Qwen1.5-MoE-A2.7B achieve near lossless performance at 25{\%} expert pruning and maintains performance within a 4{\%} margin even at 50{\%} pruning. Moreover, COMPEL consistently outperforms existing pruning methods while substantially reducing inference latency and peak GPU memory usage."
}Markdown (Informal)
[COMPEL: Compensated Mixture-of-Experts Pruning with Expert-Layer distribution](https://preview.aclanthology.org/ingest-acl/2026.findings-acl.1521/) (Yoon & Choi, Findings 2026)
ACL