@article{chandrasegaran-htait-2026-enhancing,
title = "Enhancing Clinical Trial Analysis through Large Language Models for Multi-Evidence Natural Language Inference",
author = "Chandrasegaran, Shobanapriyan and
Htait, Amal",
editor = "Piperidis, Stelios and
Bel, N{\'u}ria and
van den Heuvel, Henk and
Ide, Nancy and
Krek, Simon and
Toral, Antonio",
journal = "International Conference on Language Resources and Evaluation",
volume = "main",
month = may,
year = "2026",
address = "Palma de Mallorca, Spain",
publisher = "ELRA Language Resource Association",
url = "https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.36/",
pages = "515--523",
abstract = "The exponential growth of clinical trial reports (CTRs) presents a critical challenge for evidence-based medicine, with manual systematic reviews requiring months to synthesise findings. This paper evaluates Large Language Models (LLMs) and retrieval methods for automated Natural Language Inference (NLI) and evidence extraction from CTRs, and seeks to improve upon previously reported results in this domain. Using the NLI4CT dataset containing 2,400 annotated statement-evidence pairs from breast cancer trials, we conducted a comparative evaluation of general-purpose LLMs, domain-specific LLMs, and transformer-based baselines across entailment classification and evidence retrieval tasks. Reasoning-capable, general-purpose LLMs (such as Qwen-32B) demonstrated superior performance in the entailment classification task, exceeding both the performance of other models evaluated in this study and the previously reported state-of-the-art results. Although domain-specific adaptations showed improvements at comparable scale, larger general-purpose language models maintained superior absolute performance. For evidence retrieval, Large Language embedding models (such as bge-large-en-v1.5) surpassed classic transformer-based ranking approaches. These findings demonstrate that modern LLMs with reasoning capabilities can effectively support real-time clinical evidence synthesis without task-specific fine-tuning, offering a pathway toward scalable automated systems for clinical trial interpretation that could substantially reduce the evidence-to-practice gap in medical decision-making."
}Markdown (Informal)
[Enhancing Clinical Trial Analysis through Large Language Models for Multi-Evidence Natural Language Inference](https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.36/) (Chandrasegaran & Htait, LREC 2026)
ACL