@inproceedings{yang-etal-2026-scope,
title = "{SCOPE}: Preserving Modality-Specific Cues to Mitigate Modality Laziness in Multimodal Learning",
author = "Yang, Jingfan and
Zhang, Rui and
Hong, Liang and
Yuan, Ke",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.findings-acl.1453/",
pages = "29066--29078",
ISBN = "979-8-89176-395-1",
abstract = "Multimodal learning aims to learn unified multimodal representations from heterogeneous modalities and supports many natural language processing tasks. However, multimodal models often exhibit modality laziness: over-relying on a dominant modality and under-exploiting complementary signals. Existing approaches typically strengthen unimodal training or rebalance modality contributions, but they may still emphasize shared semantics and overlook modality-specific cues. To address this, we propose SCOPE, a unified framework for learning complete multimodal representations, achieving Shared-and-COmplementary cue PrEservation. Firstly, SCOPE uses a mutual information-guided disentanglement module to separate shared semantics from modality-specific cues and mitigate representation collapse. Secondly, SCOPE aligns modalities by enforcing structural consistency between modality-wise semantic graphs, avoiding brittle point-wise matching. Finally, SCOPE performs balanced fusion via structure-aware diffusion attention to integrate shared and complementary cues without feature homogenization. Experiments on four benchmark datasets show that SCOPE consistently outperforms SOTA baselines, achieving up to 27.10{\%} accuracy improvement."
}Markdown (Informal)
[SCOPE: Preserving Modality-Specific Cues to Mitigate Modality Laziness in Multimodal Learning](https://preview.aclanthology.org/ingest-acl/2026.findings-acl.1453/) (Yang et al., Findings 2026)
ACL