@article{danilova-stymne-2026-dataset,
title = "A Dataset of Historical Medical Periodicals Annotated with Textual Genre",
author = "Danilova, Vera and
Stymne, Sara",
editor = "Piperidis, Stelios and
Bel, N{\'u}ria and
van den Heuvel, Henk and
Ide, Nancy and
Krek, Simon and
Toral, Antonio",
journal = "International Conference on Language Resources and Evaluation",
volume = "main",
month = may,
year = "2026",
address = "Palma de Mallorca, Spain",
publisher = "ELRA Language Resource Association",
url = "https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.75/",
pages = "973--984",
abstract = "Historical corpora, especially those compiled from magazines and periodicals, are complex due to the diversity of text types and evolving genre conventions. Addressing these challenges requires systematic genre annotation and well-defined classification schemes to support downstream NLP tasks. This paper introduces a dataset of historical medical periodical texts in German and Swedish annotated for textual genre and additional features that may influence genre identification, such as the presence of OCR errors. We describe the development of the genre classification, annotator recruitment and training procedures, and provide an analysis of the annotator agreement."
}Markdown (Informal)
[A Dataset of Historical Medical Periodicals Annotated with Textual Genre](https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.75/) (Danilova & Stymne, LREC 2026)
ACL