@inproceedings{li-etal-2023-augmenters,
title = "Augmenters at {S}em{E}val-2023 Task 1: Enhancing {CLIP} in Handling Compositionality and Ambiguity for Zero-Shot Visual {WSD} through Prompt Augmentation and Text-To-Image Diffusion",
author = "Li, Jie and
Shiue, Yow-Ting and
Shih, Yong-Siang and
Geiping, Jonas",
editor = {Ojha, Atul Kr. and
Do{\u{g}}ru{\"o}z, A. Seza and
Da San Martino, Giovanni and
Tayyar Madabushi, Harish and
Kumar, Ritesh and
Sartori, Elisa},
booktitle = "Proceedings of the 17th International Workshop on Semantic Evaluation (SemEval-2023)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2023.semeval-1.5/",
doi = "10.18653/v1/2023.semeval-1.5",
pages = "44--49",
abstract = "This paper describes our zero-shot approachesfor the Visual Word Sense Disambiguation(VWSD) Task in English. Our preliminarystudy shows that the simple approach of match-ing candidate images with the phrase usingCLIP suffers from the many-to-many natureof image-text pairs. We find that the CLIP textencoder may have limited abilities in captur-ing the compositionality in natural language. Conversely, the descriptive focus of the phrasevaries from instance to instance. We addressthese issues in our two systems, Augment-CLIPand Stable Diffusion Sampling (SD Sampling).Augment-CLIP augments the text prompt bygenerating sentences that contain the contextphrase with the help of large language mod-els (LLMs). We further explore CLIP modelsin other languages, as the an ambiguous wordmay be translated into an unambiguous one inthe other language. SD Sampling uses text-to-image Stable Diffusion to generate multipleimages from the given phrase, increasing thelikelihood that a subset of images match theone that paired with the text."
}
Markdown (Informal)
[Augmenters at SemEval-2023 Task 1: Enhancing CLIP in Handling Compositionality and Ambiguity for Zero-Shot Visual WSD through Prompt Augmentation and Text-To-Image Diffusion](https://preview.aclanthology.org/fix-sig-urls/2023.semeval-1.5/) (Li et al., SemEval 2023)
ACL