@inproceedings{bhattacharya-etal-2025-festa,
title = "{FESTA}: Functionally Equivalent Sampling for Trust Assessment of Multimodal {LLM}s",
author = "Bhattacharya, Debarpan and
Kulkarni, Apoorva and
Ganapathy, Sriram",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.findings-emnlp.657/",
doi = "10.18653/v1/2025.findings-emnlp.657",
pages = "12277--12295",
ISBN = "979-8-89176-335-7",
abstract = "The accurate trust assessment of multimodal large language models (MLLMs) generated predictions, which can enable selective prediction and improve user confidence, is challenging due to the diverse multi-modal input paradigms. We propose $\textbf{F}$unctionally $\textbf{E}$quivalent $\textbf{S}$ampling for $\textbf{T}$rust $\textbf{A}$ssessment (FESTA), a multimodal input sampling technique for MLLMs, that generates an uncertainty measure based on the equivalent and complementary input samplings. The proposed task-preserving sampling approach for uncertainty quantification expands the input space to probe the consistency (through equivalent samples) and sensitivity (through complementary samples) of the model. FESTA uses only input-output access of the model (black-box), and does not require ground truth (unsupervised). The experiments are conducted with various off-the-shelf multi-modal LLMs, on both visual and audio reasoning tasks. The proposed FESTA uncertainty estimate achieves significant improvement (33.3{\%} relative improvement for vision-LLMs and 29.6{\%} relative improvement for audio-LLMs) in selective prediction performance, based on area-under-receiver-operating-characteristic curve (AUROC) metric in detecting mispredictions. The code implementation is open-sourced."
}Markdown (Informal)
[FESTA: Functionally Equivalent Sampling for Trust Assessment of Multimodal LLMs](https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.findings-emnlp.657/) (Bhattacharya et al., Findings 2025)
ACL