@inproceedings{colelough-etal-2025-overview,
title = "Overview of the {C}lin{IQL}ink 2025 Shared Task on Medical Question-Answering",
author = "Colelough, Brandon and
Bartels, Davis and
Demner-Fushman, Dina",
editor = "Demner-Fushman, Dina and
Ananiadou, Sophia and
Miwa, Makoto and
Tsujii, Junichi",
booktitle = "ACL 2025",
month = aug,
year = "2025",
address = "Viena, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/acl25-workshop-ingestion/2025.bionlp-1.32/",
pages = "378--387",
ISBN = "979-8-89176-275-6",
abstract = "In this paper, we present an overview of CLINIQLINK a shared task, collocated with the 24th BioNLP workshop at ACL 2025, designed to stress-test large language models (LLMs) on medically-oriented question answering aimed at the level of a General Practitioner. The challenge supplies 4 978 expert-verified, medical source-grounded question{--}answer pairs that cover seven formats - true/false, multiple choice, unordered list, short answer, short-inverse, multi-hop, and multi-hop-inverse. Participating systems, bundled in Docker or Apptainer images, are executed on the CodaBench platform or the University of Maryland{'}s Zaratan cluster. An automated harness (Task 1) scores closed-ended items by exact match and open-ended items with a three-tier embedding metric. A subsequent physician panel (Task 2) audits the top model responses."
}
Markdown (Informal)
[Overview of the ClinIQLink 2025 Shared Task on Medical Question-Answering](https://preview.aclanthology.org/acl25-workshop-ingestion/2025.bionlp-1.32/) (Colelough et al., BioNLP 2025)
ACL