@inproceedings{chen-etal-2026-experts,
title = "From Experts to Bases: Orthogonal Subspace Mixture for Continual Multimodal Instruction Tuning",
author = "Chen, Pei and
Wang, Xilai and
Shiqixu and
Li, Zejian and
Sun, Lingyun",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.acl-long.481/",
pages = "10545--10561",
ISBN = "979-8-89176-390-6",
abstract = "Multimodal Continual Instruction Tuning (MCIT) is essential for adapting Multimodal Large Language Models (MLLMs) to dynamic data streams, yet preventing catastrophic forgetting remains a major challenge. Existing parameter-efficient approaches often face a dilemma: fixed architectures suffer from knowledge interference, while dynamic strategies incur inefficient capacity expansion, limiting scalability. We propose MoBLoRA (Mixture-of-Bases LoRA), a novel framework for MCIT. Motivated by our geometric analysis revealing subspace redundancy across sequential tasks, MoBLoRA shifts the paradigm from expert selection to subspace mixing: it decomposes adaptation weights into a globally shared pool of orthonormal bases to capture task-invariant knowledge, and lightweight mixing matrices to encode task-specific variations. This design effectively decouples knowledge accumulation from task reconstruction. Experiments on standard benchmarks show MoBLoRA significantly outperforms state-of-the-art methods while maintaining superior parameter efficiency."
}Markdown (Informal)
[From Experts to Bases: Orthogonal Subspace Mixture for Continual Multimodal Instruction Tuning](https://preview.aclanthology.org/ingest-acl/2026.acl-long.481/) (Chen et al., ACL 2026)
ACL