@inproceedings{park-etal-2026-memory,
title = "Memory-Efficient Structured Backpropagation for On-Device {LLM} Fine-Tuning",
author = "Park, JuneYoung and
Hong, Yuri and
Kim, Seongwan and
Lee, Jaeho",
editor = "Li, Yunyao and
Rehm, Georg and
Tu, Mei",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics ({ACL} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.acl-industry.62/",
pages = "906--916",
ISBN = "979-8-89176-394-4",
abstract = "On-device fine-tuning enables privacy-preserving personalization of large language models, but mobile devices impose severe memory constraints, typically 6{--}12GB shared across all workloads. Existing approaches force a trade-off between exact gradients with high memory (MeBP) and low memory with noisy estimates (MeZO). We propose Memory-efficient Structured Backpropagation (MeSP), which bridges this gap by manually deriving backward passes that exploit LoRA{'}s low-rank structure. Our key insight is that the intermediate projection $h = xA$ can be recomputed during backward at minimal cost since rank $r \ll d_{in}$, eliminating the need to store it. MeSP achieves 49{\%} average memory reduction compared to MeBP on Qwen2.5 models (0.5B{--}3B) while computing mathematically identical gradients. Our analysis also reveals that MeZO{'}s gradient estimates show near-zero correlation with true gradients (cosine similarity $\approx$0.001), explaining its slow convergence. MeSP reduces peak memory from 361MB to 136MB for Qwen2.5-0.5B, enabling fine-tuning scenarios previously infeasible on memory-constrained devices."
}Markdown (Informal)
[Memory-Efficient Structured Backpropagation for On-Device LLM Fine-Tuning](https://preview.aclanthology.org/ingest-acl/2026.acl-industry.62/) (Park et al., ACL 2026)
ACL