@inproceedings{xv-etal-2026-beyond,
title = "Beyond Uniform {SVD}: Dual-Level Optimization across Columns and Modules for {LLM} Compression",
author = "Xv, Lin and
Gao, Xian and
Liu, Ting and
fu, Yuzhuo",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.findings-acl.912/",
pages = "18335--18349",
ISBN = "979-8-89176-395-1",
abstract = "Low-rank decomposition, particularly Singular Value Decomposition (SVD), is a pivotal technique for mitigating the storage and computational demands of Large Language Models (LLMs). However, prevalent SVD-based approaches overlook the critical phenomenon that decomposition errors exhibit significant disparity across different components of the parameter matrix, often leading to suboptimal approximation. Furthermore, existing methods lack a direct metric to evaluate the importance of individual weight matrices. To address these limitations, we propose **Duo-SVD** (**Du**al-level **O**ptimization **SVD**), a novel training-free framework that synergizes optimization at both the column and the module levels. First, Duo-SVD incorporates a Column-Preserving Strategy that explicitly retains columns exhibiting high decomposition errors, while applying low-rank approximation solely to those with lower errors. Second, at the module level, we employ a Module-Adaptive Allocation Strategy that formulates ratio allocation as a global constrained optimization problem based on perturbation-induced model deviation. Extensive experiments demonstrate that Duo-SVD consistently outperforms state-of-the-art SVD-based baselines and structured pruning methods, establishing it as a superior paradigm for efficient LLM compression."
}Markdown (Informal)
[Beyond Uniform SVD: Dual-Level Optimization across Columns and Modules for LLM Compression](https://preview.aclanthology.org/ingest-acl/2026.findings-acl.912/) (Xv et al., Findings 2026)
ACL