@inproceedings{umut-senceylan-2026-itunlp2,
title = "{ITUNLP}2 at {MWE}-2026 {A}d{MIR}e 2: Modular Zero-Shot Pipelines for Multimodal Idiom Grounding and Ranking",
author = {Umut, {\"O}zge and
{\c{S}}enceylan, Bora},
editor = {Ojha, Atul Kr. and
Mititelu, Verginica Barbu and
Constant, Mathieu and
Stoyanova, Ivelina and
Do{\u{g}}ru{\"o}z, A. Seza and
Rademaker, Alexandre},
booktitle = "Proceedings of the 22nd Workshop on Multiword Expressions ({MWE} 2026)",
month = mar,
year = "2026",
address = "Rabat, Marocco",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-eacl/2026.mwe-1.32/",
pages = "248--253",
ISBN = "979-8-89176-363-0",
abstract = "We describe a zero-shot system for AdMIRe 2.0, a shared task on multimodal understanding of potentially idiomatic expressions (PIEs). Given a context sentence with a PIE and five candidate images, the system predicts whether the usage is literal or idiomatic and ranks images by how well they match the intended meaning. We use closed-source large multimodal models and compare prompting pipelines from direct one-step ranking to modular multi-step pipelines that separate sense prediction, PIE-focused image semantics, and final ranking. All steps produce constrained JSON outputs to enable deterministic parsing and composition. In the official AdMIRe 2.0 evaluation on CodaBench, our best pipeline achieves an average Top-1 accuracy of 0.52 and an average nDCG score of 0.70 across the 12 languages we submitted. We obtain the best score among submitted systems in 10 of these languages."
}Markdown (Informal)
[ITUNLP2 at MWE-2026 AdMIRe 2: Modular Zero-Shot Pipelines for Multimodal Idiom Grounding and Ranking](https://preview.aclanthology.org/ingest-eacl/2026.mwe-1.32/) (Umut & Şenceylan, MWE 2026)
ACL