@inproceedings{laba-hryniv-2026-sparse,
title = "From Sparse to Sense-Grounded: {W}ikipedia Training for {U}krainian Visual-{WSD}",
author = "Laba, Yurii and
Hryniv, Rostyslav O.",
editor = "Bonial, Claire and
Berzak, Yevgeni",
booktitle = "Proceedings of the 30th Conference on Computational Natural Language Learning",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.conll-main.29/",
pages = "501--514",
ISBN = "979-8-89176-410-1",
abstract = "Visual Word Sense Disambiguation (Visual-WSD) requires ranking the correct image for an ambiguous word given a short trigger phrase. For low-resource languages, it is bottle{\-}necked by scarce sense-level benchmarks and limited sense-aligned multimodal supervision. We study Ukrainian and (i) extend the Ukrainian Visual-WSD benchmark from 87 to 381 instances and benchmark multilingual CLIP checkpoints and multimodal large models, and (ii) introduce two scalable Wikipedia-derived dataset construction methods. Using compute-efficient adaptation we fine-tune a multilingual CLIP backbone and show that sense-grounded supervision drives the improvements: combining our two Wikipedia-derived datasets improves HIT@1 from 37.00{\%} to 43.05{\%}."
}Markdown (Informal)
[From Sparse to Sense-Grounded: Wikipedia Training for Ukrainian Visual-WSD](https://preview.aclanthology.org/ingest-acl-workshops/2026.conll-main.29/) (Laba & Hryniv, CoNLL 2026)
ACL