@inproceedings{eder-etal-2019-de,
    title = "De-Identification of Emails: Pseudonymizing Privacy-Sensitive Data in a {G}erman Email Corpus",
    author = "Eder, Elisabeth  and
      Krieg-Holz, Ulrike  and
      Hahn, Udo",
    editor = "Mitkov, Ruslan  and
      Angelova, Galia",
    booktitle = "Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2019)",
    month = sep,
    year = "2019",
    address = "Varna, Bulgaria",
    publisher = "INCOMA Ltd.",
    url = "https://preview.aclanthology.org/iwcs-25-ingestion/R19-1030/",
    doi = "10.26615/978-954-452-056-4_030",
    pages = "259--269",
    abstract = "We deal with the pseudonymization of those stretches of text in emails that might allow to identify real individual persons. This task is decomposed into two steps. First, named entities carrying privacy-sensitive information (e.g., names of persons, locations, phone numbers or dates) are identified, and, second, these privacy-bearing entities are replaced by synthetically generated surrogates (e.g., a person originally named `John Doe' is renamed as `Bill Powers'). We describe a system architecture for surrogate generation and evaluate our approach on CodeAlltag, a German email corpus."
}Markdown (Informal)
[De-Identification of Emails: Pseudonymizing Privacy-Sensitive Data in a German Email Corpus](https://preview.aclanthology.org/iwcs-25-ingestion/R19-1030/) (Eder et al., RANLP 2019)
ACL