@inproceedings{mondal-sarkar-2025-modgenix,
title = "Modgenix at {S}em{E}val-2025 Task 1: Context Aware Vision Language Ranking ({CAV}i{LR}) for Multimodal Idiomaticity Understanding",
author = "Mondal, Joydeb and
Sarkar, Pramir",
editor = "Rosenthal, Sara and
Ros{\'a}, Aiala and
Ghosh, Debanjan and
Zampieri, Marcos",
booktitle = "Proceedings of the 19th International Workshop on Semantic Evaluation (SemEval-2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/corrections-2025-08/2025.semeval-1.106/",
pages = "780--784",
ISBN = "979-8-89176-273-2",
abstract = "This paper presents CAViLR, a hybrid multimodal approach for SemEval-2025 Task 1. Our methodintegrates CLIP as a baseline with a Mixture of Experts (MoE) framework that dynamically selectsexpert models such as Pixtral-12B and Phi-3.5 based on input context. The approach addresseschallenges in both image ranking and image sequence prediction, improving the alignment of visualand textual semantics. Experimental results demonstrate that our hybrid model outperforms individualmodels. Future work will focus on refining expert selection and enhancing disambiguation strategiesfor complex idiomatic expressions."
}
Markdown (Informal)
[Modgenix at SemEval-2025 Task 1: Context Aware Vision Language Ranking (CAViLR) for Multimodal Idiomaticity Understanding](https://preview.aclanthology.org/corrections-2025-08/2025.semeval-1.106/) (Mondal & Sarkar, SemEval 2025)
ACL