@inproceedings{shihab-etal-2025-efficient,
title = "Efficient Unstructured Pruning of Mamba State-Space Models for Resource-Constrained Environments",
author = "Shihab, Ibne Farabi and
Akter, Sanjeda and
Sharma, Anuj",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.emnlp-main.562/",
doi = "10.18653/v1/2025.emnlp-main.562",
pages = "11109--11137",
ISBN = "979-8-89176-332-6",
abstract = "As the deployment of AI models shifts towards edge devices, developing efficient sequence models has become critical. State-space models (SSMs), particularly Mamba, have emerged as strong rivals to Transformers due to their linear-time complexity and impressive performance across a range of tasks. However, their large parameter counts still hinder their use in resource-constrained environments. To address this, we propose a novel unstructured pruning framework specifically tailored for Mamba, achieving up to 70{\%} parameter reduction with only a 3{--}9{\%} drop in performance. Unlike pruning techniques designed for Transformers, our approach leverages Mamba{'}s unique recurrent dynamics by incorporating pruning based on both weight and gradient importance to preserve critical parameters, a gradual pruning schedule to maintain model stability, and a global strategy to optimize parameter allocation across the model. Extensive experiments on the WikiText-103, Long Range Arena, and ETT benchmarks demonstrate significant efficiency gains, including 1.77{\texttimes} faster inference and a 46{\%} reduction in memory usage. Our component analysis confirms Mamba{'}s robustness to pruning, highlighting the framework{'}s potential for enabling practical deployment while underscoring the need for careful evaluation to avoid introducing biases in sensitive applications."
}Markdown (Informal)
[Efficient Unstructured Pruning of Mamba State-Space Models for Resource-Constrained Environments](https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.emnlp-main.562/) (Shihab et al., EMNLP 2025)
ACL