@inproceedings{blaette-etal-2022-germaparl,
title = "How {G}erma{P}arl Evolves: Improving Data Quality by Reproducible Corpus Preparation and User Involvement",
author = "Blaette, Andreas and
Rakers, Julia and
Leonhardt, Christoph",
editor = "Fi{\v{s}}er, Darja and
Eskevich, Maria and
Lenardi{\v{c}}, Jakob and
de Jong, Franciska",
booktitle = "Proceedings of the Workshop ParlaCLARIN III within the 13th Language Resources and Evaluation Conference",
month = jun,
year = "2022",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2022.parlaclarin-1.2",
pages = "7--15",
abstract = "The development and curation of large-scale corpora of plenary debates requires not only care and attention to detail when the data is created but also effective means of sustainable quality control. This paper makes two contributions: Firstly, it presents an updated version of the GermaParl corpus of parliamentary debates in the German *Bundestag*. Secondly, it shows how the corpus preparation pipeline is designed to serve the quality of the resource by facilitating effective community involvement. Centered around a workflow which combines reproducibility, transparency and version control, the pipeline allows for continuous improvements to the corpus.",
}
Markdown (Informal)
[How GermaParl Evolves: Improving Data Quality by Reproducible Corpus Preparation and User Involvement](https://aclanthology.org/2022.parlaclarin-1.2) (Blaette et al., ParlaCLARIN 2022)
ACL