@inproceedings{searle-etal-2020-experimental,
title = "Experimental Evaluation and Development of a Silver-Standard for the {MIMIC}-{III} Clinical Coding Dataset",
author = "Searle, Thomas and
Ibrahim, Zina and
Dobson, Richard",
editor = "Demner-Fushman, Dina and
Cohen, Kevin Bretonnel and
Ananiadou, Sophia and
Tsujii, Junichi",
booktitle = "Proceedings of the 19th SIGBioMed Workshop on Biomedical Language Processing",
month = jul,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2020.bionlp-1.8/",
doi = "10.18653/v1/2020.bionlp-1.8",
pages = "76--85",
abstract = "Clinical coding is currently a labour-intensive, error-prone, but a critical administrative process whereby hospital patient episodes are manually assigned codes by qualified staff from large, standardised taxonomic hierarchies of codes. Automating clinical coding has a long history in NLP research and has recently seen novel developments setting new benchmark results. A popular dataset used in this task is MIMIC-III, a large database of clinical free text notes and their associated codes amongst other data. We argue for the reconsideration of the validity MIMIC-III`s assigned codes, as MIMIC-III has not undergone secondary validation. This work presents an open-source, reproducible experimental methodology for assessing the validity of EHR discharge summaries. We exemplify the methodology with MIMIC-III discharge summaries and show the most frequently assigned codes in MIMIC-III are undercoded up to 35{\%}."
}
Markdown (Informal)
[Experimental Evaluation and Development of a Silver-Standard for the MIMIC-III Clinical Coding Dataset](https://preview.aclanthology.org/jlcl-multiple-ingestion/2020.bionlp-1.8/) (Searle et al., BioNLP 2020)
ACL