@inproceedings{merenda-etal-2026-llms,
title = "Can {LLM}s Reason Like Doctors? Exploring the Limits of Large Language Models in Complex Medical Reasoning",
author = "Merenda, Flavio and
Gomez-Perez, Jose Manuel and
Rigau, German",
editor = "Demberg, Vera and
Inui, Kentaro and
Marquez, Llu{\'i}s",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {EACL} 2026",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-eacl/2026.findings-eacl.127/",
pages = "2432--2452",
ISBN = "979-8-89176-386-9",
abstract = "Large language models (LLMs) have shown remarkable progress in reasoning across multiple domains. However, it remains unclear whether their abilities reflect genuine reasoning or sophisticated pattern matching, a distinction critical in medical decision-making, where reliable multi-step problem-solving is required. Accordingly, we conduct one of the largest evaluations to date, assessing 77 LLMs with diverse fine-tuning approaches, ranging from 1 billion parameters to frontier models. Guided by medical problem-solving theory, we select three medical question answering (QA) benchmarks targeting key reasoning skills: reasoning processes, susceptibility to cognitive biases, and metacognitive abilities. Additionally, we manually annotate a subset of questions to assess the abduction, deduction, and induction capabilities of LLMs, offering detailed insight into the reasoning mechanisms followed by physicians, an aspect that has received relatively limited attention in this domain. Most models, particularly smaller ones, struggle even with specialized fine-tuning or advanced prompting. Larger models perform better but still show clear limitations in complex medical reasoning. Our findings highlight the need to improve specific reasoning strategies to better reflect medical decision-making. The datasets and code used in this study are publicly available at: \url{https://github.com/expertailab/Can-LLMs-Reason-Like-Doctors}"
}Markdown (Informal)
[Can LLMs Reason Like Doctors? Exploring the Limits of Large Language Models in Complex Medical Reasoning](https://preview.aclanthology.org/ingest-eacl/2026.findings-eacl.127/) (Merenda et al., Findings 2026)
ACL