@inproceedings{pampari-etal-2018-emrqa,
title = "emr{QA}: A Large Corpus for Question Answering on Electronic Medical Records",
author = "Pampari, Anusri and
Raghavan, Preethi and
Liang, Jennifer and
Peng, Jian",
editor = "Riloff, Ellen and
Chiang, David and
Hockenmaier, Julia and
Tsujii, Jun{'}ichi",
booktitle = "Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing",
month = oct # "-" # nov,
year = "2018",
address = "Brussels, Belgium",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/D18-1258/",
doi = "10.18653/v1/D18-1258",
pages = "2357--2368",
abstract = "We propose a novel methodology to generate domain-specific large-scale question answering (QA) datasets by re-purposing existing annotations for other NLP tasks. We demonstrate an instance of this methodology in generating a large-scale QA dataset for electronic medical records by leveraging existing expert annotations on clinical notes for various NLP tasks from the community shared i2b2 datasets. The resulting corpus (emrQA) has 1 million questions-logical form and 400,000+ question-answer evidence pairs. We characterize the dataset and explore its learning potential by training baseline models for question to logical form and question to answer mapping."
}
Markdown (Informal)
[emrQA: A Large Corpus for Question Answering on Electronic Medical Records](https://preview.aclanthology.org/jlcl-multiple-ingestion/D18-1258/) (Pampari et al., EMNLP 2018)
ACL