@inproceedings{huang-etal-2025-lora,
title = "{L}o{RA}-{PAR}: A Flexible Dual-System {L}o{RA} Partitioning Approach to Efficient {LLM} Fine-Tuning",
author = "Huang, Yining and
Li, Bin and
Tang, Keke and
Chen, Meilian",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.findings-emnlp.738/",
doi = "10.18653/v1/2025.findings-emnlp.738",
pages = "13693--13704",
ISBN = "979-8-89176-335-7",
abstract = "Large-scale generative models like DeepSeek-R1 and OpenAI-O1 benefit substantially from chain-of-thought (CoT) reasoning, yet pushing their performance typically requires vast data, large model sizes, and full-parameter fine-tuning. While parameter-efficient fine-tuning (PEFT) helps reduce cost, most existing approaches primarily address domain adaptation or layer-wise allocation rather than explicitly tailoring data and parameters to different response demands. Inspired by ``Thinking, Fast and Slow,'' which characterizes two distinct modes of thought{---}System 1 (fast, intuitive, often automatic) and System 2 (slower, more deliberative and analytic){---}we draw an analogy that different ``subregions'' of an LLM{'}s parameters might similarly specialize for tasks that demand quick, intuitive responses versus those requiring multi-step logical reasoning. Therefore, we propose LoRA-PAR, a dual-system LoRA framework that partitions both data and parameters by System 1 or System 2 demands, using fewer yet more focused parameters for each task. Specifically, we classify task data via multi-model role-playing and voting, and partition parameters based on importance scoring, then adopt a two-stage fine-tuning strategy of training System 1 tasks with supervised fine-tuning (SFT) to enhance knowledge and intuition and refine System 2 tasks with reinforcement learning (RL) to reinforce deeper logical deliberation next. Extensive experiments show that the two-stage fine-tuning strategy, SFT and RL, lowers active parameter usage while matching or surpassing SOTA PEFT baselines."
}Markdown (Informal)
[LoRA-PAR: A Flexible Dual-System LoRA Partitioning Approach to Efficient LLM Fine-Tuning](https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.findings-emnlp.738/) (Huang et al., Findings 2025)
ACL