@inproceedings{reich-etal-2023-measuring,
title = "Measuring Faithful and Plausible Visual Grounding in {VQA}",
author = "Reich, Daniel and
Putze, Felix and
Schultz, Tanja",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2023",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2023.findings-emnlp.206/",
doi = "10.18653/v1/2023.findings-emnlp.206",
pages = "3129--3144",
abstract = "Metrics for Visual Grounding (VG) in Visual Question Answering (VQA) systems primarily aim to measure a system{'}s reliance on relevant parts of the image when inferring an answer to the given question. Lack of VG has been a common problem among state-of-the-art VQA systems and can manifest in over-reliance on irrelevant image parts or a disregard for the visual modality entirely. Although inference capabilities of VQA models are often illustrated by a few qualitative illustrations, most systems are not quantitatively assessed for their VG properties. We believe, an easily calculated criterion for meaningfully measuring a system{'}s VG can help remedy this shortcoming, as well as add another valuable dimension to model evaluations and analysis. To this end, we propose a new VG metric that captures if a model a) identifies question-relevant objects in the scene, and b) actually relies on the information contained in the relevant objects when producing its answer, i.e., if its visual grounding is both ``faithful'' and ``plausible''. Our metric, called Faithful {\&} Plausible Visual Grounding (FPVG), is straightforward to determine for most VQA model designs. We give a detailed description of FPVG and evaluate several reference systems spanning various VQA architectures. Code to support the metric calculations on the GQA data set is available on GitHub."
}
Markdown (Informal)
[Measuring Faithful and Plausible Visual Grounding in VQA](https://preview.aclanthology.org/fix-sig-urls/2023.findings-emnlp.206/) (Reich et al., Findings 2023)
ACL