@inproceedings{campana-etal-2026-fbk,
title = "{FBK}-{NLP} at {C}lin{S}kill {QA} 2026: Improving Temporal Reasoning via Keypoint-Augmented Inputs",
author = "Campana, Pedro Gabriel and
Lavelli, Alberto and
Magnini, Bernardo",
editor = "Gupta, Deepak and
Demner-Fushman, Dina",
booktitle = "Proceedings of the {B}io{NLP} 2026 (Shared Tasks)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.bionlp-2.14/",
pages = "92--98",
ISBN = "979-8-89176-435-4",
abstract = "Understanding procedural skills from visual data is a key challenge in medical AI, especially for tasks that require reasoning over temporal sequences. We report on FBK-NLP{'}s participation at the ClinSkill QA 2026 shared task, which requires models to arrange shuffled key frames into a coherent sequence of clinical actions and provide explanations for the resulting order. We conduct a systematic study of prompting and reasoning strategies using an open and easily deployable vision-language model (VLM). The central finding of our study is that incorporating keypoint-based representations of people{'}s body parts substantially improves temporal reasoning behind frame ordering. Furthermore, we show that model performance is highly sensitive to prompt design and to seemingly minor factors such as filename ordering and the inclusion of domain information."
}Markdown (Informal)
[FBK-NLP at ClinSkill QA 2026: Improving Temporal Reasoning via Keypoint-Augmented Inputs](https://preview.aclanthology.org/ingest-acl-workshops/2026.bionlp-2.14/) (Campana et al., BioNLP 2026)
ACL