@inproceedings{yan-etal-2026-infect,
title = "What Does Infect Mean to Cardio? Investigating the Role of Clinical Specialty Data in Medical {LLM}s",
author = "Yan, Xinlan and
Wu, Di and
Lei, Yibin and
Monz, Christof and
Calixto, Iacer",
editor = "Demberg, Vera and
Inui, Kentaro and
Marquez, Llu{\'i}s",
booktitle = "Proceedings of the 19th Conference of the {E}uropean Chapter of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-eacl/2026.eacl-long.390/",
pages = "8339--8358",
ISBN = "979-8-89176-380-7",
abstract = "In this paper, we introduce S-MedQA, an English medical question-answering (QA) dataset designed for benchmarking large language models (LLMs) in fine-grained clinical specialties. S-MedQA consists of over 24k examples, covering 15 medical specialties, with QA pairs that can have multiple specialty annotations, such as when a question is cross-disciplinary. The dataset is constructed using both machine and expert verification to maximize data availability and reliability. We use S-MedQA to investigate the role of clinical specialties in the knowledge-intensive scenario of medical QA. Our results show that training on data from a clinical specialty does not necessarily lead to the best performance on that specialty. Additionally, regardless of the specialty the LLM was fine-tuned on, token probabilities of clinically relevant terms consistently increase across all specialties. Based on these findings, we hypothesize that improvement gains, at least in our settings, are derived primarily from domain shifting (e.g., general to medical) rather than from injecting specialty-specific knowledge. This suggests a need to rethink the role of fine-tuning data in the medical domain. To encourage further advancements in the clinical NLP field, we release S-MedQA along with all the code required to reproduce our experiments for the research community."
}Markdown (Informal)
[What Does Infect Mean to Cardio? Investigating the Role of Clinical Specialty Data in Medical LLMs](https://preview.aclanthology.org/ingest-eacl/2026.eacl-long.390/) (Yan et al., EACL 2026)
ACL