@inproceedings{keh-etal-2024-asking,
title = "Asking More Informative Questions for Grounded Retrieval",
author = "Keh, Sedrick and
Chiu, Justin and
Fried, Daniel",
editor = "Duh, Kevin and
Gomez, Helena and
Bethard, Steven",
booktitle = "Findings of the Association for Computational Linguistics: NAACL 2024",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2024.findings-naacl.276/",
doi = "10.18653/v1/2024.findings-naacl.276",
pages = "4429--4442",
abstract = "When a model is trying to gather information in an interactive setting, it benefits from asking informative questions. However, in the case of a grounded multi-turn image identification task, previous studies have been constrained to polar yes/no questions (White et al., 2021), limiting how much information the model can gain in a single turn. We present an approach that formulates more informative, open-ended questions. In doing so, we discover that off-the-shelf visual question answering (VQA) models often make presupposition errors, which standard information gain question selection methods fail to account for. To address this issue, we propose a method that can incorporate presupposition handling into both question selection and belief updates. Specifically, we use a two-stage process, where the model first filters out images which are irrelevant to a given question, then updates its beliefs about which image the user intends. Through self-play and human evaluations, we show that our method is successful in asking informative open-ended questions, increasing accuracy over the past state-of-the-art by 14{\%}, while resulting in 48{\%} more efficient games in human evaluations."
}
Markdown (Informal)
[Asking More Informative Questions for Grounded Retrieval](https://preview.aclanthology.org/add-emnlp-2024-awards/2024.findings-naacl.276/) (Keh et al., Findings 2024)
ACL