@inproceedings{cheon-kang-2025-countdown,
title = "{COUNTDOWN}: Contextually Sparse Activation Filtering Out Unnecessary Weights in Down Projection",
author = "Cheon, Jaewon and
Kang, Pilsung",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.emnlp-main.1442/",
doi = "10.18653/v1/2025.emnlp-main.1442",
pages = "28381--28397",
ISBN = "979-8-89176-332-6",
abstract = "The growing size of large language models has created significant computational inefficiencies. To address this challenge, sparse activation selectively deactivates non-essential parameters during inference, reducing computational costs in FFNN layers. While existing methods focus on non-linear gating mechanisms, we hypothesize that the sparsity of the FFNN layer lies globally in the form of a linear combination over its internal down projection matrix. Based on this insight, we propose two methods: M-COUNTDOWN, leveraging indirect coefficients, and D-COUNTDOWN, utilizing direct coefficients of the linear combination. Experimental results demonstrate that D-COUNTDOWN can omit 90{\%} of computations with performance loss as low as 5.5{\%} ideally, while M-COUNTDOWN provides a predictor-free solution with up to 29.4{\%} better performance preservation compared to existing methods. Our specialized kernel implementations effectively realize these theoretical gains into substantial real-world acceleration."
}Markdown (Informal)
[COUNTDOWN: Contextually Sparse Activation Filtering Out Unnecessary Weights in Down Projection](https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.emnlp-main.1442/) (Cheon & Kang, EMNLP 2025)
ACL