@inproceedings{kumar-wang-2024-harnessing,
title = "Harnessing {CLIP} for Evidence Identification in Scientific Literature: A Multimodal Approach to Context24 Shared Task",
author = "Kumar, Anukriti and
Wang, Lucy",
editor = "Ghosal, Tirthankar and
Singh, Amanpreet and
Waard, Anita and
Mayr, Philipp and
Naik, Aakanksha and
Weller, Orion and
Lee, Yoonjoo and
Shen, Shannon and
Qin, Yanxia",
booktitle = "Proceedings of the Fourth Workshop on Scholarly Document Processing (SDP 2024)",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.sdp-1.29/",
pages = "307--313",
abstract = "Knowing whether scientific claims are supported by evidence is fundamental to scholarly communication and evidence-based decision-making. We present our approach to Task 1 of the Context24 Shared Task{---}Contextualizing Scientific Figures and Tables (SDP@ACL2024), which focuses on identifying multimodal evidence from scientific publications that support claims. We finetune CLIP, a state-of-the-art model for image-text similarity tasks, to identify and rank figures and tables in papers that substantiate specific claims. Our methods focus on text and image preprocessing techniques and augmenting the organizer-provided training data with labeled examples from the SciMMIR and MedICaT datasets. Our best-performing model achieved NDCG@5 and NDCG@10 values of 0.26 and 0.30, respectively, on the Context24 test split. Our findings underscore the effectiveness of data augmentation and preprocessing in improving the model`s ability in evidence matching."
}
Markdown (Informal)
[Harnessing CLIP for Evidence Identification in Scientific Literature: A Multimodal Approach to Context24 Shared Task](https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.sdp-1.29/) (Kumar & Wang, sdp 2024)
ACL