@inproceedings{alfter-2025-daalft,
title = "daalft at {S}em{E}val-2025 Task 1: Multi-step Zero-shot Multimodal Idiomaticity Ranking",
author = "Alfter, David",
editor = "Rosenthal, Sara and
Ros{\'a}, Aiala and
Ghosh, Debanjan and
Zampieri, Marcos",
booktitle = "Proceedings of the 19th International Workshop on Semantic Evaluation (SemEval-2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/corrections-2025-08/2025.semeval-1.19/",
pages = "127--140",
ISBN = "979-8-89176-273-2",
abstract = "This paper presents a multi-step zero-shot system for SemEval-2025 Task 1 on Advancing Multimodal Idiomaticity Representation (AdMIRe). The system employs two state-of-the-art multimodal language models, Claude Sonnet 3.5 and OpenAI GPT-4o, to determine idiomaticity and rank images for relevance in both subtasks. A hybrid approach combining o1-preview for idiomaticity classification and GPT-4o for visual ranking produced the best overall results. The system demonstrates competitive performance on the English extended dataset for Subtask A, but faces challenges in cross-lingual transfer to Portuguese. Comparing Image+Text and Text-Only approaches reveals interesting trends and raises questions about the role of visual information in multimodal idiomaticity detection."
}
Markdown (Informal)
[daalft at SemEval-2025 Task 1: Multi-step Zero-shot Multimodal Idiomaticity Ranking](https://preview.aclanthology.org/corrections-2025-08/2025.semeval-1.19/) (Alfter, SemEval 2025)
ACL