@inproceedings{pancur-erjavec-2020-siparl,
title = "The si{P}arl corpus of {S}lovene parliamentary proceedings",
author = "Pancur, Andrej and
Erjavec, Toma{\v{z}}",
editor = "Fi{\v{s}}er, Darja and
Eskevich, Maria and
de Jong, Franciska",
booktitle = "Proceedings of the Second ParlaCLARIN Workshop",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://preview.aclanthology.org/fix-sig-urls/2020.parlaclarin-1.6/",
pages = "28--34",
language = "eng",
ISBN = "979-10-95546-47-4",
abstract = "The paper describes the process of acquisition, up-translation, encoding, annotation, and distribution of siParl, a collection of the parliamentary debates from the Assembly of the Republic of Slovenia from 1990{--}2018, covering the period from just before Slovenia became an independent country in 1991, and almost up to the present. The entire corpus, comprising over 8 thousand sessions, 1 million speeches and 200 million words was uniformly encoded in accordance with the TEI-based Parla-CLARIN schema for encoding corpora of parliamentary debates, and contains extensive meta-data about the speakers, a typology of sessions etc. and structural and editorial annotations. The corpus was also part-of-speech tagged and lemmatised using state-of-the-art tools. The corpus is maintained on GitHub with its major versions archived in the CLARIN.SI repository and is available for linguistic analysis in the scope of the on-line CLARIN.SI concordancers, thus offering an invaluable resource for scholars studying Slovenian political history."
}
Markdown (Informal)
[The siParl corpus of Slovene parliamentary proceedings](https://preview.aclanthology.org/fix-sig-urls/2020.parlaclarin-1.6/) (Pancur & Erjavec, ParlaCLARIN 2020)
ACL