@inproceedings{jhalani-etal-2024-precision,
title = "Precision Empowers, Excess Distracts: Visual Question Answering With Dynamically Infused Knowledge In Language Models",
author = "Jhalani, Manas and
K M, Annervaz and
Bhattacharyya, Pushpak",
editor = "Lalitha Devi, Sobha and
Arora, Karunesh",
booktitle = "Proceedings of the 21st International Conference on Natural Language Processing (ICON)",
month = dec,
year = "2024",
address = "AU-KBC Research Centre, Chennai, India",
publisher = "NLP Association of India (NLPAI)",
url = "https://preview.aclanthology.org/fix-sig-urls/2024.icon-1.3/",
pages = "21--36",
abstract = "In the realm of multimodal tasks, Visual Question Answering (VQA) plays a crucial role by addressing natural language questions grounded in visual content. Knowledge-Based Visual Question Answering (KBVQA) advances this concept by adding external knowledge along with images to respond to questions. We introduce an approach for KBVQA, augmenting the existing vision-language transformer encoder-decoder (OFA) model . Our main contribution involves enhancing questions by incorporating relevant external knowledge extracted from knowledge graphs, using a dynamic triple extraction"
}
Markdown (Informal)
[Precision Empowers, Excess Distracts: Visual Question Answering With Dynamically Infused Knowledge In Language Models](https://preview.aclanthology.org/fix-sig-urls/2024.icon-1.3/) (Jhalani et al., ICON 2024)
ACL