@inproceedings{lu-etal-2018-entity,
title = "Entity-aware Image Caption Generation",
author = "Lu, Di and
Whitehead, Spencer and
Huang, Lifu and
Ji, Heng and
Chang, Shih-Fu",
editor = "Riloff, Ellen and
Chiang, David and
Hockenmaier, Julia and
Tsujii, Jun{'}ichi",
booktitle = "Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing",
month = oct # "-" # nov,
year = "2018",
address = "Brussels, Belgium",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/D18-1435/",
doi = "10.18653/v1/D18-1435",
pages = "4013--4023",
abstract = "Current image captioning approaches generate descriptions which lack specific information, such as named entities that are involved in the images. In this paper we propose a new task which aims to generate informative image captions, given images and hashtags as input. We propose a simple but effective approach to tackle this problem. We first train a convolutional neural networks - long short term memory networks (CNN-LSTM) model to generate a template caption based on the input image. Then we use a knowledge graph based collective inference algorithm to fill in the template with specific named entities retrieved via the hashtags. Experiments on a new benchmark dataset collected from Flickr show that our model generates news-style image descriptions with much richer information. Our model outperforms unimodal baselines significantly with various evaluation metrics."
}
Markdown (Informal)
[Entity-aware Image Caption Generation](https://preview.aclanthology.org/fix-sig-urls/D18-1435/) (Lu et al., EMNLP 2018)
ACL
- Di Lu, Spencer Whitehead, Lifu Huang, Heng Ji, and Shih-Fu Chang. 2018. Entity-aware Image Caption Generation. In Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing, pages 4013–4023, Brussels, Belgium. Association for Computational Linguistics.