@inproceedings{grimshaw-etal-2024-sheffieldveraai,
title = "{S}heffield{V}era{AI} at {S}em{E}val-2024 Task 4: Prompting and fine-tuning a Large Vision-Language Model for Binary Classification of Persuasion Techniques in Memes",
author = "Grimshaw, Charlie and
Bontcheva, Kalina and
Song, Xingyi",
editor = {Ojha, Atul Kr. and
Do{\u{g}}ru{\"o}z, A. Seza and
Tayyar Madabushi, Harish and
Da San Martino, Giovanni and
Rosenthal, Sara and
Ros{\'a}, Aiala},
booktitle = "Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.semeval-1.278/",
doi = "10.18653/v1/2024.semeval-1.278",
pages = "2051--2056",
abstract = "This paper describes our approach for SemEval-2024 Task 4: Multilingual Detection of Persuasion Techniques in Memes. Specifically, we concentrate on Subtask 2b, a binary classification challenge that entails categorizing memes as either {\textquotedblleft}propagandistic{\textquotedblright} or {\textquotedblleft}non-propagandistic{\textquotedblright}. To address this task, we utilized the large multimodal pretrained model, LLaVa. We explored various prompting strategies and fine-tuning methods, and observed that the model, when not fine-tuned but provided with a few-shot learning examples, achieved the best performance. Additionally, we enhanced the model`s multilingual capabilities by integrating a machine translation model. Our system secured the 2nd place in the Arabic language category."
}
Markdown (Informal)
[SheffieldVeraAI at SemEval-2024 Task 4: Prompting and fine-tuning a Large Vision-Language Model for Binary Classification of Persuasion Techniques in Memes](https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.semeval-1.278/) (Grimshaw et al., SemEval 2024)
ACL