@inproceedings{ilinykh-dobnik-2025-surprisal,
title = "Surprisal reveals diversity gaps in image captioning and different scorers change the story",
author = "Ilinykh, Nikolai and
Dobnik, Simon",
editor = "Flek, Lucie and
Narayan, Shashi and
Phương, L{\^e} Hồng and
Pei, Jiahuan",
booktitle = "Proceedings of the 18th International Natural Language Generation Conference",
month = oct,
year = "2025",
address = "Hanoi, Vietnam",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/author-page-you-zhang-rochester/2025.inlg-main.22/",
pages = "366--375",
abstract = "We quantify linguistic diversity in image captioning with surprisal variance {--} the spread of token-level negative log-probabilities within a caption set. On the MSCOCO test set, we compare five state-of-the-art vision-and-language LLMs, decoded with greedy and nucleus sampling, to human captions. Measured with a caption-trained n-gram LM, humans display roughly twice the surprisal variance of models, but rescoring the same captions with a general-language model reverses the pattern. Our analysis introduces the surprisal-based diversity metric for image captioning. We show that relying on a single scorer can completely invert conclusions, thus, robust diversity evaluation must report surprisal under several scorers."
}Markdown (Informal)
[Surprisal reveals diversity gaps in image captioning and different scorers change the story](https://preview.aclanthology.org/author-page-you-zhang-rochester/2025.inlg-main.22/) (Ilinykh & Dobnik, INLG 2025)
ACL