@inproceedings{hamalainen-hengchen-2019-paft,
    title = "From the Paft to the Fiiture: a Fully Automatic {NMT} and Word Embeddings Method for {OCR} Post-Correction",
    author = {H{\"a}m{\"a}l{\"a}inen, Mika  and
      Hengchen, Simon},
    editor = "Mitkov, Ruslan  and
      Angelova, Galia",
    booktitle = "Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2019)",
    month = sep,
    year = "2019",
    address = "Varna, Bulgaria",
    publisher = "INCOMA Ltd.",
    url = "https://preview.aclanthology.org/ingest-emnlp/R19-1051/",
    doi = "10.26615/978-954-452-056-4_051",
    pages = "431--436",
    abstract = "A great deal of historical corpora suffer from errors introduced by the OCR (optical character recognition) methods used in the digitization process. Correcting these errors manually is a time-consuming process and a great part of the automatic approaches have been relying on rules or supervised machine learning. We present a fully automatic unsupervised way of extracting parallel data for training a character-based sequence-to-sequence NMT (neural machine translation) model to conduct OCR error correction."
}Markdown (Informal)
[From the Paft to the Fiiture: a Fully Automatic NMT and Word Embeddings Method for OCR Post-Correction](https://preview.aclanthology.org/ingest-emnlp/R19-1051/) (Hämäläinen & Hengchen, RANLP 2019)
ACL