@article{janssen-etal-2026-semi,
title = "From Semi-Digital Edition to Historical {NLP} Resource:Constructing and Annotating Historical Multilingual Parallel Text Collections on the {TEITOK} Platform",
author = "Janssen, Maarten and
Jouravel, Anna and
Lendvai, Piroska",
editor = "Piperidis, Stelios and
Bel, N{\'u}ria and
van den Heuvel, Henk and
Ide, Nancy and
Krek, Simon and
Toral, Antonio",
journal = "International Conference on Language Resources and Evaluation",
volume = "main",
month = may,
year = "2026",
address = "Palma de Mallorca, Spain",
publisher = "ELRA Language Resource Association",
url = "https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.120/",
pages = "1553--1561",
abstract = "We construct a multilingual, parallelized digital collection comprising a reconstructed Old Greek text from the 4th century CE and its seven historical versions, modern editions, and translations. We describe the workflow and integrated tools on the TEITOK web-based platform for ingesting, aligning, parallelizing and morphosyntactically annotating these materials. Textual alignment is performed on both the sentence and word level, after which the data are annotated with dependency parses in the Universal Dependencies paradigm. The newly created and manually post-corrected collection can be explored via advanced parallel search functionalities and flexible visualization modes. This workflow is meant to provide support for digital humanities and historical NLP projects via transforming the input texts into parallel NLP resources, enabling cross-fertilization and new insights by multiple research communities."
}Markdown (Informal)
[From Semi-Digital Edition to Historical NLP Resource:Constructing and Annotating Historical Multilingual Parallel Text Collections on the TEITOK Platform](https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.120/) (Janssen et al., LREC 2026)
ACL