@inproceedings{kiros-etal-2018-illustrative,
title = "Illustrative Language Understanding: Large-Scale Visual Grounding with Image Search",
author = "Kiros, Jamie and
Chan, William and
Hinton, Geoffrey",
editor = "Gurevych, Iryna and
Miyao, Yusuke",
booktitle = "Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2018",
address = "Melbourne, Australia",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/P18-1085/",
doi = "10.18653/v1/P18-1085",
pages = "922--933",
abstract = "We introduce Picturebook, a large-scale lookup operation to ground language via {\textquoteleft}snapshots' of our physical world accessed through image search. For each word in a vocabulary, we extract the top-$k$ images from Google image search and feed the images through a convolutional network to extract a word embedding. We introduce a multimodal gating function to fuse our Picturebook embeddings with other word representations. We also introduce Inverse Picturebook, a mechanism to map a Picturebook embedding back into words. We experiment and report results across a wide range of tasks: word similarity, natural language inference, semantic relatedness, sentiment/topic classification, image-sentence ranking and machine translation. We also show that gate activations corresponding to Picturebook embeddings are highly correlated to human judgments of concreteness ratings."
}
Markdown (Informal)
[Illustrative Language Understanding: Large-Scale Visual Grounding with Image Search](https://preview.aclanthology.org/add-emnlp-2024-awards/P18-1085/) (Kiros et al., ACL 2018)
ACL