@inproceedings{tian-etal-2025-imara,
title = "{I}ma{RA}: An Imaginative Frame Augmented Method for Low-Resource Multimodal Metaphor Detection and Explanation",
author = "Tian, Yuan and
Wang, Minzheng and
Xu, Nan and
Mao, Wenji",
editor = "Chiruzzo, Luis and
Ritter, Alan and
Wang, Lu",
booktitle = "Findings of the Association for Computational Linguistics: NAACL 2025",
month = apr,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/moar-dois/2025.findings-naacl.220/",
doi = "10.18653/v1/2025.findings-naacl.220",
pages = "3953--3967",
ISBN = "979-8-89176-195-7",
abstract = "Multimodal metaphor detection is an important and challenging task in multimedia computing, which aims to distinguish between metaphorical and literal multimodal expressions. Existing studies mainly utilize typical multimodal computing approaches for detection, neglecting the unique cross-domain and cross-modality characteristics underlying multimodal metaphor understanding. According to Conceptual Metaphor Theory (CMT), the inconsistency between source and target domains and their attribute similarity are essential to infer the intricate meanings implied in metaphors. In practice, the scarcity of the annotated multimodal metaphorical contents in the real world brings additional difficulty to the detection task and further complicates the understanding of multimodal metaphors. To address the above challenges, in this paper, we propose a novel Imaginative FRame Augmented (ImaRA) method for low-resource multimodal metaphor detection and explanation inspired by CMT. Specifically, we first identify imaginative frame as an associative structure to stimulate the imaginative thinking of multimodal metaphor detection and understanding. We then construct a cross-modal imagination dataset rich in multimodal metaphors and corresponding imaginative frames, and retrieve an augmented instance from this imagination dataset using imaginative frames mined from the input. This augmented instance serves as the demonstration exemplar to boost the metaphor reasoning ability of the multimodal large language model (MLLM) in low-resource multimodal scenarios. Experiments on two publicly available datasets show that our method consistently achieves robust results compared to MLLM-based methods for both multimodal metaphor detection and explanation in low-resource scenarios and meanwhile surpasses existing multimodal metaphor detection methods with full training data."
}
Markdown (Informal)
[ImaRA: An Imaginative Frame Augmented Method for Low-Resource Multimodal Metaphor Detection and Explanation](https://preview.aclanthology.org/moar-dois/2025.findings-naacl.220/) (Tian et al., Findings 2025)
ACL