@inproceedings{santhosh-yu-2026-nu,
title = "{NU}{\_}{D}eep{H}ealth{NLP} at {\#}{SMM}4{H}-{H}ea{RD} 2026: Entity-Conditioned Generation and a Four-Stage Pipeline for Automated {SOAP} Note Generation",
author = "Santhosh, Thanya Mysore and
Yu, Deahan",
editor = "Lopez-Garcia, Guillermo and
Gonzalez-Hernandez, Graciela",
booktitle = "Proceedings of the 11th Social Media Mining for Health Research and Applications ({SMM}4{H}-{H}ea{RD} 2026) Workshop and Shared Tasks",
month = jul,
year = "2026",
address = "San Diego, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.smm4h-1.17/",
pages = "103--107",
ISBN = "979-8-89176-432-3",
abstract = "We describe two system submissions to Task 4 of the SMM4H-HeaRD 2026 Shared Task on automated SOAP note generation from doctor{--}patient dialogues. Our first submission is a standalone entity-conditioned generation model: Mistral-7B-Instruct-v0.1 fine-tuned with QLoRA on 8,529 MedSynth training dialogues, where both training and inference prompts include clinical entities extracted and grouped by SOAP section. Our second submission is a four-stage modular pipeline that additionally incorporates a hybrid retrieval stage and a rule-based verification stage. The key finding of this work is that incorporating structured clinical domain knowledge, in the form of NER entities grouped by SOAP section, directly into the generation prompt produces consistent and reliable improvements over dialogue-only generation. Our four-stage pipeline submission achieved an average score of 0.54 on the official test set, ranking first on the shared task leaderboard."
}Markdown (Informal)
[NU_DeepHealthNLP at #SMM4H-HeaRD 2026: Entity-Conditioned Generation and a Four-Stage Pipeline for Automated SOAP Note Generation](https://preview.aclanthology.org/ingest-acl-workshops/2026.smm4h-1.17/) (Santhosh & Yu, SMM4H 2026)
ACL