@inproceedings{ji-etal-2025-capturing,
title = "Capturing Latent Modal Association For Multimodal Entity Alignment",
author = "Ji, Yongquan and
Cheng, Jingwei and
Zhang, Fu and
Lu, Chenglong",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.findings-emnlp.1213/",
doi = "10.18653/v1/2025.findings-emnlp.1213",
pages = "22278--22293",
ISBN = "979-8-89176-335-7",
abstract = "Multimodal entity alignment aims to identify equivalent entities in heterogeneous knowledge graphs by leveraging complementary information from multiple modalities. However, existing methods often overlook the quality of input modality embeddings during modality interaction {--} such as missing modality generation, modal information transfer, modality fusion {--} which may inadvertently amplify noise propagation while suppressing discriminative feature representations. To address these issues, we propose a novel model {--} CLAMEA for capturing latent modal association for multimodal entity alignment. Specifically, we use a self- attention mechanism to enhance salient information while attenuating noise within individual modality embeddings. We design a dynamic modal attention flow fusion module to capture and balance latent intra- and inter-modal associations and generate fused modality embeddings. Based on both fused and available modalities, we adopt variational autoencoder (VAE) to generate high quality embeddings for the missing modality. We use a cross-modal association extraction module to extract latent modal associations from the completed modality embeddings, further enhancing embedding quality. Experimental results on two real-world datasets demonstrate the effectiveness of our approach, which achieves an absolute 3.1{\%} higher Hits@ 1 score than the sota method."
}Markdown (Informal)
[Capturing Latent Modal Association For Multimodal Entity Alignment](https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.findings-emnlp.1213/) (Ji et al., Findings 2025)
ACL