@inproceedings{belikova-kosenko-2024-deeppavlov,
title = "{D}eep{P}avlov at {S}em{E}val-2024 Task 3: Multimodal Large Language Models in Emotion Reasoning",
author = "Belikova, Julia and
Kosenko, Dmitrii",
editor = {Ojha, Atul Kr. and
Do{\u{g}}ru{\"o}z, A. Seza and
Tayyar Madabushi, Harish and
Da San Martino, Giovanni and
Rosenthal, Sara and
Ros{\'a}, Aiala},
booktitle = "Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/Ingest-2025-COMPUTEL/2024.semeval-1.249/",
doi = "10.18653/v1/2024.semeval-1.249",
pages = "1747--1757",
abstract = "This paper presents the solution of the DeepPavlov team for the Multimodal Sentiment Cause Analysis competition in SemEval-2024 Task 3, Subtask 2 (Wang et al., 2024). In the evaluation leaderboard, our approach ranks 7th with an F1-score of 0.2132. Large Language Models (LLMs) are transformative in their ability to comprehend and generate human-like text. With recent advancements, Multimodal Large Language Models (MLLMs) have expanded LLM capabilities, integrating different modalities such as audio, vision, and language. Our work delves into the state-of-the-art MLLM Video-LLaMA, its associated modalities, and its application to the emotion reasoning downstream task, Multimodal Emotion Cause Analysis in Conversations (MECAC). We investigate the model`s performance in several modes: zero-shot, few-shot, individual embeddings, and fine-tuned, providing insights into their limits and potential enhancements for emotion understanding."
}
Markdown (Informal)
[DeepPavlov at SemEval-2024 Task 3: Multimodal Large Language Models in Emotion Reasoning](https://preview.aclanthology.org/Ingest-2025-COMPUTEL/2024.semeval-1.249/) (Belikova & Kosenko, SemEval 2024)
ACL