@inproceedings{sharma-etal-2018-conceptual,
title = "Conceptual Captions: A Cleaned, Hypernymed, Image Alt-text Dataset For Automatic Image Captioning",
author = "Sharma, Piyush and
Ding, Nan and
Goodman, Sebastian and
Soricut, Radu",
editor = "Gurevych, Iryna and
Miyao, Yusuke",
booktitle = "Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2018",
address = "Melbourne, Australia",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/P18-1238/",
doi = "10.18653/v1/P18-1238",
pages = "2556--2565",
abstract = "We present a new dataset of image caption annotations, Conceptual Captions, which contains an order of magnitude more images than the MS-COCO dataset (Lin et al., 2014) and represents a wider variety of both images and image caption styles. We achieve this by extracting and filtering image caption annotations from billions of webpages. We also present quantitative evaluations of a number of image captioning models and show that a model architecture based on Inception-ResNetv2 (Szegedy et al., 2016) for image-feature extraction and Transformer (Vaswani et al., 2017) for sequence modeling achieves the best performance when trained on the Conceptual Captions dataset."
}
Markdown (Informal)
[Conceptual Captions: A Cleaned, Hypernymed, Image Alt-text Dataset For Automatic Image Captioning](https://preview.aclanthology.org/fix-sig-urls/P18-1238/) (Sharma et al., ACL 2018)
ACL