@inproceedings{vejendla-2025-slicemoe,
title = "{S}lice{M}o{E}: Routing Embedding Slices Instead of Tokens for Fine-Grained and Balanced Transformer Scaling",
author = "Vejendla, Harshil",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/lei-li-partial-disambiguation/2025.emnlp-main.807/",
pages = "15982--15989",
ISBN = "979-8-89176-332-6",
abstract = "Mixture-of-Experts (MoE) layers scale transformers by routing tokens to a sparse subset of feed-forward experts. Token-level routing, however, assigns an entire semantic spectrum to each expert, creating capacity bottlenecks, load-balancing pathologies, and limited specialisation. We introduce SliceMoE, an architecture that routes contiguous slices of a token{'}s hidden vector. A d-dimensional embedding is partitioned into S slices, and for each slice, a lightweight shared router predicts the top-k experts. Experts operate on their assigned slices independently, and outputs are re-assembled, maintaining per-token FLOP efficiency. Because slices from different tokens interleave within an expert, utilisation is naturally smoother. We propose a slice-level capacity loss, cross-slice dropout, and efficient fused batched-GEMM kernels. Experiments on WikiText-103 language modelling, WMT En{--}De translation, and three text-classification datasets show SliceMoE attains up to 1.7x faster inference than dense baselines, 12{--}18{\%} lower perplexity than parameter-matched token-MoE, and improved expert balance, with interpretable expertise over syntactic versus semantic sub-spaces."
}Markdown (Informal)
[SliceMoE: Routing Embedding Slices Instead of Tokens for Fine-Grained and Balanced Transformer Scaling](https://preview.aclanthology.org/lei-li-partial-disambiguation/2025.emnlp-main.807/) (Vejendla, EMNLP 2025)
ACL