@article{chen-vossen-2026-cheap,
title = "A Cheap Lunch: Synthetic Annotation With Reduced Human Effort for Medical Text Mining",
author = "Chen, Shutao and
Vossen, Piek T.J.M.",
editor = "Piperidis, Stelios and
Bel, N{\'u}ria and
van den Heuvel, Henk and
Ide, Nancy and
Krek, Simon and
Toral, Antonio",
journal = "International Conference on Language Resources and Evaluation",
volume = "main",
month = may,
year = "2026",
address = "Palma de Mallorca, Spain",
publisher = "ELRA Language Resource Association",
url = "https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.813/",
pages = "10353--10364",
abstract = "Electronic Health Records are rich resources of patient knowledge and information among which knowledge about the functioning of patients as defined in the International Classification of Functioning (ICF) by the WHO. However, the patient notes have yet to be explored as the knowledge is packaged in sometimes cryptic language exchanged between caretakers. Recent research started to use NLP techniques to extract this knowledge but often requires laborious annotation. In this paper, we report on how the annotation can (partly) be done by a generative LLM, both for ICF categories that were previously manually annotated and for new ICF categories for which there was no annotation. We show that a domain specific encoder finetuned with both manual and synthetic annotations outperforms finetuning with just the manual annotations on a dedicated test set that was adapted for the new categories with minimal manual effort. We also assessed the quality of the synthetic annotations of the training data. Our process shows how competitive text classifiers for medical text mining can be developed and extended to new categories with minimal manual effort by experts."
}Markdown (Informal)
[A Cheap Lunch: Synthetic Annotation With Reduced Human Effort for Medical Text Mining](https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.813/) (Chen & Vossen, LREC 2026)
ACL