@inproceedings{zhang-etal-2025-3dm,
title = "3{DM}: Distill, Dynamic Drop, and Merge for Debiasing Multi-modal Large Language Models",
author = "Zhang, Zhaoxi and
Lee, Sanwoo and
Wang, Zhixiang and
Wu, Yunfang",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/display_plenaries/2025.findings-acl.722/",
pages = "14049--14059",
ISBN = "979-8-89176-256-5",
abstract = "The rapid advancement of Multi-modal Language Models (MLLMs) has significantly enhanced performance in multimodal tasks, yet these models often exhibit inherent biases that compromise their reliability and fairness. Traditional debiasing methods face a trade-off between the need for extensive labeled datasets and high computational costs. Model merging, which efficiently combines multiple models into a single one, offers a promising alternative but its usage is limited to MLLMs with the same architecture. We propose 3DM, a novel framework integrating Distill, Dynamic Drop, and Merge to address these challenges. 3DM employs knowledge distillation to harmonize models with divergent architectures and introduces a dynamic dropping strategy that assigns parameter-specific drop rates based on their contributions to bias and overall performance. This approach preserves critical weights while mitigating biases, as validated on the MMSD2.0 sarcasm detection dataset. Our key contributions include architecture-agnostic merging, dynamic dropping, and the introduction of the Bias Ratio (BR) metric for systematic bias assessment. Empirical results demonstrate that 3DM outperforms existing methods in balancing debiasing and enhancing the overall performance, offering a practical and scalable solution for deploying fair and efficient MLLMs in real-world applications."
}
Markdown (Informal)
[3DM: Distill, Dynamic Drop, and Merge for Debiasing Multi-modal Large Language Models](https://preview.aclanthology.org/display_plenaries/2025.findings-acl.722/) (Zhang et al., Findings 2025)
ACL