@inproceedings{tater-etal-2024-evaluating,
title = "Evaluating Semantic Relations in Predicting Textual Labels for Images of Abstract and Concrete Concepts",
author = "Tater, Tarun and
Schulte Im Walde, Sabine and
Frassinelli, Diego",
editor = "Kuribayashi, Tatsuki and
Rambelli, Giulia and
Takmaz, Ece and
Wicke, Philipp and
Oseki, Yohei",
booktitle = "Proceedings of the Workshop on Cognitive Modeling and Computational Linguistics",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.cmcl-1.18/",
doi = "10.18653/v1/2024.cmcl-1.18",
pages = "214--220",
abstract = "This study investigates the performance of SigLIP, a state-of-the-art Vision-Language Model (VLM), in predicting labels for images depicting 1,278 concepts. Our analysis across 300 images per concept shows that the model frequently predicts the exact user-tagged labels, but similarly, it often predicts labels that are semantically related to the exact labels in various ways: synonyms, hypernyms, co-hyponyms, and associated words, particularly for abstract concepts. We then zoom into the diversity of the user tags of images and word associations for abstract versus concrete concepts. Surprisingly, not only abstract but also concrete concepts exhibit significant variability, thus challenging the traditional view that representations of concrete concepts are less diverse."
}
Markdown (Informal)
[Evaluating Semantic Relations in Predicting Textual Labels for Images of Abstract and Concrete Concepts](https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.cmcl-1.18/) (Tater et al., CMCL 2024)
ACL