@inproceedings{alikhani-stone-2019-caption,
title = "{\textquotedblleft}Caption{\textquotedblright} as a Coherence Relation: Evidence and Implications",
author = "Alikhani, Malihe and
Stone, Matthew",
editor = "Bernardi, Raffaella and
Fernandez, Raquel and
Gella, Spandana and
Kafle, Kushal and
Kanan, Christopher and
Lee, Stefan and
Nabi, Moin",
booktitle = "Proceedings of the Second Workshop on Shortcomings in Vision and Language",
month = jun,
year = "2019",
address = "Minneapolis, Minnesota",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/W19-1806/",
doi = "10.18653/v1/W19-1806",
pages = "58--67",
abstract = "We study verbs in image{--}text corpora, contrasting \textit{caption} corpora, where texts are explicitly written to characterize image content, with \textit{depiction} corpora, where texts and images may stand in more general relations. Captions show a distinctively limited distribution of verbs, with strong preferences for specific tense, aspect, lexical aspect, and semantic field. These limitations, which appear in data elicited by a range of methods, restrict the utility of caption corpora to inform image retrieval, multimodal document generation, and perceptually-grounded semantic models. We suggest that these limitations reflect the discourse constraints in play when subjects write texts to accompany imagery, so we argue that future development of image{--}text corpora should work to increase the diversity of event descriptions, while looking explicitly at the different ways text and imagery can be coherently related."
}
Markdown (Informal)
[“Caption” as a Coherence Relation: Evidence and Implications](https://preview.aclanthology.org/add-emnlp-2024-awards/W19-1806/) (Alikhani & Stone, NAACL 2019)
ACL