@inproceedings{hwang-etal-2024-ku,
title = "{KU}-{DMIS} at {MEDIQA}-{CORR} 2024: Exploring the Reasoning Capabilities of Small Language Models in Medical Error Correction",
author = "Hwang, Hyeon and
Lee, Taewhoo and
Kim, Hyunjae and
Kang, Jaewoo",
editor = "Naumann, Tristan and
Ben Abacha, Asma and
Bethard, Steven and
Roberts, Kirk and
Bitterman, Danielle",
booktitle = "Proceedings of the 6th Clinical Natural Language Processing Workshop",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest_wac_2008/2024.clinicalnlp-1.51/",
doi = "10.18653/v1/2024.clinicalnlp-1.51",
pages = "526--536",
abstract = "Recent advancements in large language models (LM) like OpenAI`s GPT-4 have shown promise in healthcare, particularly in medical question answering and clinical applications. However, their deployment raises privacy concerns and their size limits use in resource-constrained environments.Smaller open-source LMs have emerged as alternatives, but their reliability in medicine remains underexplored.This study evaluates small LMs in the medical field using the MEDIQA-CORR 2024 task, which assesses the ability of models to identify and correct errors in clinical notes. Initially, zero-shot inference and simple fine-tuning of small models resulted in poor performance. When fine-tuning with chain-of-thought (CoT) reasoning using synthetic data generated by GPT-4, their performance significantly improved. Meerkat-7B, a small LM trained with medical CoT reasoning, demonstrated notable performance gains. Our model outperforms other small non-commercial LMs and some larger models, achieving a 73.36 aggregate score on MEDIQA-CORR 2024."
}
Markdown (Informal)
[KU-DMIS at MEDIQA-CORR 2024: Exploring the Reasoning Capabilities of Small Language Models in Medical Error Correction](https://preview.aclanthology.org/ingest_wac_2008/2024.clinicalnlp-1.51/) (Hwang et al., ClinicalNLP 2024)
ACL