@inproceedings{makarov-clematide-2020-semi,
title = "Semi-supervised Contextual Historical Text Normalization",
author = "Makarov, Peter and
Clematide, Simon",
editor = "Jurafsky, Dan and
Chai, Joyce and
Schluter, Natalie and
Tetreault, Joel",
booktitle = "Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics",
month = jul,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2020.acl-main.650/",
doi = "10.18653/v1/2020.acl-main.650",
pages = "7284--7295",
abstract = "Historical text normalization, the task of mapping historical word forms to their modern counterparts, has recently attracted a lot of interest (Bollmann, 2019; Tang et al., 2018; Lusetti et al., 2018; Bollmann et al., 2018;Robertson and Goldwater, 2018; Bollmannet al., 2017; Korchagina, 2017). Yet, virtually all approaches suffer from the two limitations: 1) They consider a fully supervised setup, often with impractically large manually normalized datasets; 2) Normalization happens on words in isolation. By utilizing a simple generative normalization model and obtaining powerful contextualization from the target-side language model, we train accurate models with unlabeled historical data. In realistic training scenarios, our approach often leads to reduction in manually normalized data at the same accuracy levels."
}
Markdown (Informal)
[Semi-supervised Contextual Historical Text Normalization](https://preview.aclanthology.org/fix-sig-urls/2020.acl-main.650/) (Makarov & Clematide, ACL 2020)
ACL