@inproceedings{ryu-etal-2024-ehr,
title = "{EHR}-{S}eq{SQL} : A Sequential Text-to-{SQL} Dataset For Interactively Exploring Electronic Health Records",
author = "Ryu, Jaehee and
Cho, Seonhee and
Lee, Gyubok and
Choi, Edward",
editor = "Ku, Lun-Wei and
Martins, Andre and
Srikumar, Vivek",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2024",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.findings-acl.971/",
doi = "10.18653/v1/2024.findings-acl.971",
pages = "16388--16407",
abstract = "In this paper, we introduce EHR-SeqSQL, a novel sequential text-to-SQL dataset for Electronic Health Record (EHR) databases. EHR-SeqSQL is designed to address critical yet underexplored aspects in text-to-SQL parsing: interactivity, compositionality, and efficiency. To the best of our knowledge, EHR-SeqSQL is not only the largest but also the first medical text-to-SQL dataset benchmark to include sequential and contextual questions. We provide a data split and the new test set designed to assess compositional generalization ability. Our experiments demonstrate the superiority of a multi-turn approach over a single-turn approach in learning compositionality. Additionally, our dataset integrates specially crafted tokens into SQL queries to improve execution efficiency. With EHR-SeqSQL, we aim to bridge the gap between practical needs and academic research in the text-to-SQL domain."
}
Markdown (Informal)
[EHR-SeqSQL : A Sequential Text-to-SQL Dataset For Interactively Exploring Electronic Health Records](https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.findings-acl.971/) (Ryu et al., Findings 2024)
ACL