@inproceedings{li-etal-2026-dualalign,
title = "{D}ual{A}lign: Generating Clinically Grounded Synthetic Data",
author = "Li, Rumeng and
XWang and
yu, Hong",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.findings-acl.1405/",
pages = "28189--28208",
ISBN = "979-8-89176-395-1",
abstract = "Synthetic clinical data are essential for advancing AI in healthcare, given strict privacy constraints on electronic health records (EHRs), the scarcity of annotated data for rare or slowly progressing conditions, and demographic biases in observational cohorts. Large language models (LLMs) can generate fluent clinical text, but ensuring that such outputs are both clinically grounded and useful for downstream modeling remains challenging. We present DualAlign, a disease-agnostic framework for generating privacy-preserving, clinically faithful synthetic EHR narratives. DualAlign improves generation fidelity through two complementary alignment mechanisms: persona alignment, which conditions generation on patient demographics and risk factors, and symptom-trajectory alignment, which grounds narratives in empirically observed longitudinal symptom patterns. Using Alzheimer{'}s disease (AD) as a case study, DualAlign produces context-aware, symptom-rich sentences that more closely reflect real-world clinical documentation. Augmenting limited gold-standard data with DualAlign substantially improves AD symptom classification, outperforming both gold-only training and unconstrained synthetic baselines. Overall, DualAlign provides a generalizable approach for generating high-utility synthetic clinical text in chronic and progressive diseases, reducing annotation burden while enabling scalable and privacy-conscious clinical NLP research."
}Markdown (Informal)
[DualAlign: Generating Clinically Grounded Synthetic Data](https://preview.aclanthology.org/ingest-acl/2026.findings-acl.1405/) (Li et al., Findings 2026)
ACL