@inproceedings{zhu-etal-2025-modalities,
title = "Let Modalities Teach Each Other: Modal-Collaborative Knowledge Extraction and Fusion for Multimodal Knowledge Graph Completion",
author = "Zhu, Guoliang and
Ren, Tao and
Wang, Dandan and
Hu, Jun",
editor = "Chiruzzo, Luis and
Ritter, Alan and
Wang, Lu",
booktitle = "Findings of the Association for Computational Linguistics: NAACL 2025",
month = apr,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2025.findings-naacl.346/",
pages = "6195--6207",
ISBN = "979-8-89176-195-7",
abstract = "Multimodal knowledge graph completion (MKGC) aims to predict missing triples in MKGs using multimodal information. Recent research typically either extracts information from each modality separately to predict, then ensembles the predictions at the decision stage, or projects multiple modalities into a unified feature space to learn multimodal representations for prediction. However, these methods usually overlook the intrinsic correlation between modalities in MKGs which should be leveraged in both unimodal knowledge extraction and multimodal knowledge fusion. Motivated by this, we propose a noval Modal-collaborative knowledge learning (Moodle) framework for MKGC, the key idea of which is to foster mutual guidance and collaboration during unimodal knowledge extraction, to let each modality acquire distinct and complementary knowledge that subsequently enhances the multimodal knowledge fusion. Specifically, Moodle preserves the representations of different modalities to learn unimodal knowledge while modeling the mutual guidance through multi-task learning. Furthermore, Moodle performs multimodal knowledge fusion and prediction guided by unimodal knowledge, capturing their synergistic relationships and acquire fine-grained semantic knowledge through contrastive learning. Extensive experiments on three real-world datasets demonstrate the advantages of Moodle over state-of-the-art methods."
}
Markdown (Informal)
[Let Modalities Teach Each Other: Modal-Collaborative Knowledge Extraction and Fusion for Multimodal Knowledge Graph Completion](https://preview.aclanthology.org/fix-sig-urls/2025.findings-naacl.346/) (Zhu et al., Findings 2025)
ACL