@inproceedings{haider-2024-large,
title = "A Large Annotated Reference Corpus of {N}ew {H}igh {G}erman Poetry",
author = "Haider, Thomas",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.lrec-main.59/",
pages = "677--683",
abstract = "This paper introduces a large annotated corpus of public domain German poetry, covering the time period from 1600 to the 1920s with 65k poems. We describe how the corpus was compiled, how it was cleaned (including duplicate detection), and how it looks now in terms of size, format, temporal distribution, and automatic annotation. Besides metadata, the corpus contains reliable annotation of tokens, syllables, part-of-speech, and meter and verse measure. Finally, we give some statistics on the annotation and an overview of other poetry corpora."
}
Markdown (Informal)
[A Large Annotated Reference Corpus of New High German Poetry](https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.lrec-main.59/) (Haider, LREC-COLING 2024)
ACL