@inproceedings{karaisl-2023-question,
title = "A Question of Confidence: Using {OCR} Technology for Script analysis",
author = "Karaisl, Antonia",
editor = {H{\"a}m{\"a}l{\"a}inen, Mika and
{\"O}hman, Emily and
Pirinen, Flammie and
Alnajjar, Khalid and
Miyagawa, So and
Bizzoni, Yuri and
Partanen, Niko and
Rueter, Jack},
booktitle = "Proceedings of the Joint 3rd International Conference on Natural Language Processing for Digital Humanities and 8th International Workshop on Computational Linguistics for Uralic Languages",
month = dec,
year = "2023",
address = "Tokyo, Japan",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2023.nlp4dh-1.20/",
pages = "162--171",
abstract = "The following article proposes a method employing the Tesseract OCR engine to aid palaeographic analysis and scribal identification. Repurposing the so-called confidence score provided by the OCR engine, different methods of visualization are used to surface differences between font families, script types and manuscript hands."
}
Markdown (Informal)
[A Question of Confidence: Using OCR Technology for Script analysis](https://preview.aclanthology.org/fix-sig-urls/2023.nlp4dh-1.20/) (Karaisl, NLP4DH-IWCLUL 2023)
ACL
- Antonia Karaisl. 2023. A Question of Confidence: Using OCR Technology for Script analysis. In Proceedings of the Joint 3rd International Conference on Natural Language Processing for Digital Humanities and 8th International Workshop on Computational Linguistics for Uralic Languages, pages 162–171, Tokyo, Japan. Association for Computational Linguistics.