@inproceedings{balvet-etal-2014-talc,
title = "{TALC}-sef A Manually-Revised {POS}-{TA}gged Literary Corpus in {S}erbian, {E}nglish and {F}rench",
author = "Balvet, Antonio and
Stosic, Dejan and
Miletic, Aleksandra",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Loftsson, Hrafn and
Maegaard, Bente and
Mariani, Joseph and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Ninth International Conference on Language Resources and Evaluation ({LREC}`14)",
month = may,
year = "2014",
address = "Reykjavik, Iceland",
publisher = "European Language Resources Association (ELRA)",
url = "https://preview.aclanthology.org/Add-Cong-Liu-Florida-Atlantic-University-author-id/L14-1591/",
pages = "4105--4110",
abstract = "In this paper, we present a parallel literary corpus for Serbian, English and French, the TALC-sef corpus. The corpus includes a manually-revised pos-tagged reference Serbian corpus of over 150,000 words. The initial objective was to devise a reference parallel corpus in the three languages, both for literary and linguistic studies. The French and English sub-corpora had been pos-tagged from the onset, using TreeTagger (Schmid, 1994), but the corpus lacked, until now, a tagged version of the Serbian sub-corpus. Here, we present the original parallel literary corpus, then we address issues related to pos-tagging a large collection of Serbian text: from the conception of an appropriate tagset for Serbian, to the choice of an automatic pos-tagger adapted to the task, and then to some quantitative and qualitative results. We then move on to a discussion of perspectives in the near future for further annotations of the whole parallel corpus."
}
Markdown (Informal)
[TALC-sef A Manually-Revised POS-TAgged Literary Corpus in Serbian, English and French](https://preview.aclanthology.org/Add-Cong-Liu-Florida-Atlantic-University-author-id/L14-1591/) (Balvet et al., LREC 2014)
ACL