@inproceedings{li-etal-2024-lmeme,
title = "{LMEME} at {S}em{E}val-2024 Task 4: Teacher Student Fusion - Integrating {CLIP} with {LLM}s for Enhanced Persuasion Detection",
author = "Li, Shiyi and
Wang, Yike and
Yang, Liang and
Zhang, Shaowu and
Lin, Hongfei",
editor = {Ojha, Atul Kr. and
Do{\u{g}}ru{\"o}z, A. Seza and
Tayyar Madabushi, Harish and
Da San Martino, Giovanni and
Rosenthal, Sara and
Ros{\'a}, Aiala},
booktitle = "Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.semeval-1.92/",
doi = "10.18653/v1/2024.semeval-1.92",
pages = "628--633",
abstract = "This paper describes our system used in the SemEval-2024 Task 4 Multilingual Detection of Persuasion Techniques in Memes. Our team proposes a detection system that employs a Teacher Student Fusion framework. Initially, a Large Language Model serves as the teacher, engaging in abductive reasoning on multimodal inputs to generate background knowledge on persuasion techniques, assisting in the training of a smaller downstream model. The student model adopts CLIP as an encoder for text and image features, and we incorporate an attention mechanism for modality alignment. Ultimately, our proposed system achieves a Macro-F1 score of 0.8103, ranking 1st out of 20 on the leaderboard of Subtask 2b in English. In Bulgarian, Macedonian and Arabic, our detection capabilities are ranked 1/15, 3/15 and 14/15."
}
Markdown (Informal)
[LMEME at SemEval-2024 Task 4: Teacher Student Fusion - Integrating CLIP with LLMs for Enhanced Persuasion Detection](https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.semeval-1.92/) (Li et al., SemEval 2024)
ACL