@inproceedings{brathen-etal-2021-creating,
title = "Creating and Evaluating a Synthetic {N}orwegian Clinical Corpus for De-Identification",
author = "Br{\r{a}}then, Synn{\o}ve and
Wie, Wilhelm and
Dalianis, Hercules",
editor = "Dobnik, Simon and
{\O}vrelid, Lilja",
booktitle = "Proceedings of the 23rd Nordic Conference on Computational Linguistics (NoDaLiDa)",
month = may # " 31--2 " # jun,
year = "2021",
address = "Reykjavik, Iceland (Online)",
publisher = {Link{\"o}ping University Electronic Press, Sweden},
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2021.nodalida-main.22/",
pages = "222--230",
abstract = "Building tools to remove sensitive information such as personal names, addresses, and telephone numbers - so called Protected Health Information (PHI) - from clinical free text is an important task to make clinical texts available for research. These de-identification tools must be assessed regarding their quality in the form of the measurements precision and re- call. To assess such tools, gold standards - annotated clinical text - must be available. Such gold standards exist for larger languages. For Norwegian, how- ever, there are no such resources. Therefore, an already existing Norwegian synthetic clinical corpus, NorSynthClinical, has been extended with PHIs and annotated by two annotators, obtaining an inter-annotator agreement of 0.94 F1-measure. In total, the corpus has 409 annotated PHI instances and is called NorSynthClinical PHI. A de-identification hybrid tool (machine learning and rule-based meth- ods) for Norwegian was developed and trained with open available resources, and obtained an overall F1-measure of 0.73 and a recall of 0.62, when tested using NorSynthClinical PHI. NorSynthClinical PHI is made open and available at Github to be used by the research community."
}
Markdown (Informal)
[Creating and Evaluating a Synthetic Norwegian Clinical Corpus for De-Identification](https://preview.aclanthology.org/jlcl-multiple-ingestion/2021.nodalida-main.22/) (Bråthen et al., NoDaLiDa 2021)
ACL