@inproceedings{ljubesic-etal-2016-new,
title = "New Inflectional Lexicons and Training Corpora for Improved Morphosyntactic Annotation of {C}roatian and {S}erbian",
author = "Ljube{\v{s}}i{\'c}, Nikola and
Klubi{\v{c}}ka, Filip and
Agi{\'c}, {\v{Z}}eljko and
Jazbec, Ivo-Pavao",
booktitle = "Proceedings of the Tenth International Conference on Language Resources and Evaluation ({LREC}'16)",
month = may,
year = "2016",
address = "Portoro{\v{z}}, Slovenia",
publisher = "European Language Resources Association (ELRA)",
url = "https://aclanthology.org/L16-1676",
pages = "4264--4270",
abstract = "In this paper we present newly developed inflectional lexcions and manually annotated corpora of Croatian and Serbian. We introduce hrLex and srLex - two freely available inflectional lexicons of Croatian and Serbian - and describe the process of building these lexicons, supported by supervised machine learning techniques for lemma and paradigm prediction. Furthermore, we introduce hr500k, a manually annotated corpus of Croatian, 500 thousand tokens in size. We showcase the three newly developed resources on the task of morphosyntactic annotation of both languages by using a recently developed CRF tagger. We achieve best results yet reported on the task for both languages, beating the HunPos baseline trained on the same datasets by a wide margin.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ljubesic-etal-2016-new">
<titleInfo>
<title>New Inflectional Lexicons and Training Corpora for Improved Morphosyntactic Annotation of Croatian and Serbian</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nikola</namePart>
<namePart type="family">Ljubešić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Filip</namePart>
<namePart type="family">Klubička</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Željko</namePart>
<namePart type="family">Agić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivo-Pavao</namePart>
<namePart type="family">Jazbec</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016-may</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Portorož, Slovenia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper we present newly developed inflectional lexcions and manually annotated corpora of Croatian and Serbian. We introduce hrLex and srLex - two freely available inflectional lexicons of Croatian and Serbian - and describe the process of building these lexicons, supported by supervised machine learning techniques for lemma and paradigm prediction. Furthermore, we introduce hr500k, a manually annotated corpus of Croatian, 500 thousand tokens in size. We showcase the three newly developed resources on the task of morphosyntactic annotation of both languages by using a recently developed CRF tagger. We achieve best results yet reported on the task for both languages, beating the HunPos baseline trained on the same datasets by a wide margin.</abstract>
<identifier type="citekey">ljubesic-etal-2016-new</identifier>
<location>
<url>https://aclanthology.org/L16-1676</url>
</location>
<part>
<date>2016-may</date>
<extent unit="page">
<start>4264</start>
<end>4270</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T New Inflectional Lexicons and Training Corpora for Improved Morphosyntactic Annotation of Croatian and Serbian
%A Ljubešić, Nikola
%A Klubička, Filip
%A Agić, Željko
%A Jazbec, Ivo-Pavao
%S Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)
%D 2016
%8 may
%I European Language Resources Association (ELRA)
%C Portorož, Slovenia
%F ljubesic-etal-2016-new
%X In this paper we present newly developed inflectional lexcions and manually annotated corpora of Croatian and Serbian. We introduce hrLex and srLex - two freely available inflectional lexicons of Croatian and Serbian - and describe the process of building these lexicons, supported by supervised machine learning techniques for lemma and paradigm prediction. Furthermore, we introduce hr500k, a manually annotated corpus of Croatian, 500 thousand tokens in size. We showcase the three newly developed resources on the task of morphosyntactic annotation of both languages by using a recently developed CRF tagger. We achieve best results yet reported on the task for both languages, beating the HunPos baseline trained on the same datasets by a wide margin.
%U https://aclanthology.org/L16-1676
%P 4264-4270
Markdown (Informal)
[New Inflectional Lexicons and Training Corpora for Improved Morphosyntactic Annotation of Croatian and Serbian](https://aclanthology.org/L16-1676) (Ljubešić et al., LREC 2016)
ACL