@inproceedings{hamalainen-hengchen-2019-paft,
title = "From the Paft to the Fiiture: a Fully Automatic {NMT} and Word Embeddings Method for {OCR} Post-Correction",
author = {H{\"a}m{\"a}l{\"a}inen, Mika and
Hengchen, Simon},
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2019)",
month = sep,
year = "2019",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd.",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/R19-1051/",
doi = "10.26615/978-954-452-056-4_051",
pages = "431--436",
abstract = "A great deal of historical corpora suffer from errors introduced by the OCR (optical character recognition) methods used in the digitization process. Correcting these errors manually is a time-consuming process and a great part of the automatic approaches have been relying on rules or supervised machine learning. We present a fully automatic unsupervised way of extracting parallel data for training a character-based sequence-to-sequence NMT (neural machine translation) model to conduct OCR error correction."
}
Markdown (Informal)
[From the Paft to the Fiiture: a Fully Automatic NMT and Word Embeddings Method for OCR Post-Correction](https://preview.aclanthology.org/jlcl-multiple-ingestion/R19-1051/) (Hämäläinen & Hengchen, RANLP 2019)
ACL