@inproceedings{simancek-vydiswaran-2024-handling,
title = "Handling Name Errors of a {BERT}-Based De-Identification System: Insights from Stratified Sampling and {M}arkov-based Pseudonymization",
author = "Simancek, Dalton and
Vydiswaran, VG Vinod",
editor = {Volodina, Elena and
Alfter, David and
Dobnik, Simon and
Lindstr{\"o}m Tiedemann, Therese and
Mu{\~n}oz S{\'a}nchez, Ricardo and
Szawerna, Maria Irena and
Vu, Xuan-Son},
booktitle = "Proceedings of the Workshop on Computational Approaches to Language Data Pseudonymization (CALD-pseudo 2024)",
month = mar,
year = "2024",
address = "St. Julian{'}s, Malta",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.caldpseudo-1.1",
pages = "1--7",
abstract = "Missed recognition of named entities while de-identifying clinical narratives poses a critical challenge in protecting patient-sensitive health information. Mitigating name recognition errors is essential to minimize risk of patient re-identification. In this paper, we emphasize the need for stratified sampling and enhanced contextual considerations concerning Name Tokens using a fine-tuned Longformer BERT model for clinical text de-identifcation. We introduce a Hidden in Plain Sight (HIPS) Markov-based replacement technique for names to mask name recognition misses, revealing a significant reduction in name leakage rates. Our experimental results underscore the impact on addressing name recognition challenges in BERT-based de-identification systems for heightened privacy protection in electronic health records.",
}
Markdown (Informal)
[Handling Name Errors of a BERT-Based De-Identification System: Insights from Stratified Sampling and Markov-based Pseudonymization](https://aclanthology.org/2024.caldpseudo-1.1) (Simancek & Vydiswaran, CALD-pseudo-WS 2024)
ACL