@inproceedings{erjavec-krek-2008-jos,
title = "The {JOS} Morphosyntactically Tagged Corpus of {S}lovene",
author = "Erjavec, Toma{\v{z}} and
Krek, Simon",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Piperidis, Stelios and
Tapias, Daniel",
booktitle = "Proceedings of the Sixth International Conference on Language Resources and Evaluation ({LREC}'08)",
month = may,
year = "2008",
address = "Marrakech, Morocco",
publisher = "European Language Resources Association (ELRA)",
url = "https://preview.aclanthology.org/fix-sig-urls/L08-1451/",
abstract = "The JOSmorphosyntactic resources for Slovene consist of the specifications, lexicon, and two corpora: jos100k, a 100,000 word balanced monolingual sampled corpus annotated with hand validated morphosyntactic descriptions (MSDs) and lemmas, and jos1M, the 1 million-word partially hand validated corpus. The two corpora have been sampled from the 600M-word Slovene reference corpus FidaPLUS. The JOS resources have a standardised encoding, with the MULTEXT-East-type morphosyntactic specifications and the corpora encoded according to the Text Encoding Initiative Guidelines P5. JOS resources are available as a dataset for research under the Creative Commons licence and are meant to facilitate developments of HLT for Slovene."
}
Markdown (Informal)
[The JOS Morphosyntactically Tagged Corpus of Slovene](https://preview.aclanthology.org/fix-sig-urls/L08-1451/) (Erjavec & Krek, LREC 2008)
ACL