@inproceedings{bick-2016-morphological,
title = "A Morphological Lexicon of {E}speranto with Morpheme Frequencies",
author = "Bick, Eckhard",
booktitle = "Proceedings of the Tenth International Conference on Language Resources and Evaluation ({LREC}'16)",
month = may,
year = "2016",
address = "Portoro{\v{z}}, Slovenia",
publisher = "European Language Resources Association (ELRA)",
url = "https://aclanthology.org/L16-1171",
pages = "1075--1078",
abstract = "This paper discusses the internal structure of complex Esperanto words (CWs). Using a morphological analyzer, possible affixation and compounding is checked for over 50,000 Esperanto lexemes against a list of 17,000 root words. Morpheme boundaries in the resulting analyses were then checked manually, creating a CW dictionary of 28,000 words, representing 56.4{\%} of the lexicon, or 19.4{\%} of corpus tokens. The error percentage of the EspGram morphological analyzer for new corpus CWs was 4.3{\%} for types and 6.4{\%} for tokens, with a recall of almost 100{\%}, and wrong/spurious boundaries being more common than missing ones. For pedagogical purposes a morpheme frequency dictionary was constructed for a 16 million word corpus, confirming the importance of agglutinative derivational morphemes in the Esperanto lexicon. Finally, as a means to reduce the morphological ambiguity of CWs, we provide POS likelihoods for Esperanto suffixes.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bick-2016-morphological">
<titleInfo>
<title>A Morphological Lexicon of Esperanto with Morpheme Frequencies</title>
</titleInfo>
<name type="personal">
<namePart type="given">Eckhard</namePart>
<namePart type="family">Bick</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016-may</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Portorož, Slovenia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper discusses the internal structure of complex Esperanto words (CWs). Using a morphological analyzer, possible affixation and compounding is checked for over 50,000 Esperanto lexemes against a list of 17,000 root words. Morpheme boundaries in the resulting analyses were then checked manually, creating a CW dictionary of 28,000 words, representing 56.4% of the lexicon, or 19.4% of corpus tokens. The error percentage of the EspGram morphological analyzer for new corpus CWs was 4.3% for types and 6.4% for tokens, with a recall of almost 100%, and wrong/spurious boundaries being more common than missing ones. For pedagogical purposes a morpheme frequency dictionary was constructed for a 16 million word corpus, confirming the importance of agglutinative derivational morphemes in the Esperanto lexicon. Finally, as a means to reduce the morphological ambiguity of CWs, we provide POS likelihoods for Esperanto suffixes.</abstract>
<identifier type="citekey">bick-2016-morphological</identifier>
<location>
<url>https://aclanthology.org/L16-1171</url>
</location>
<part>
<date>2016-may</date>
<extent unit="page">
<start>1075</start>
<end>1078</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Morphological Lexicon of Esperanto with Morpheme Frequencies
%A Bick, Eckhard
%S Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)
%D 2016
%8 may
%I European Language Resources Association (ELRA)
%C Portorož, Slovenia
%F bick-2016-morphological
%X This paper discusses the internal structure of complex Esperanto words (CWs). Using a morphological analyzer, possible affixation and compounding is checked for over 50,000 Esperanto lexemes against a list of 17,000 root words. Morpheme boundaries in the resulting analyses were then checked manually, creating a CW dictionary of 28,000 words, representing 56.4% of the lexicon, or 19.4% of corpus tokens. The error percentage of the EspGram morphological analyzer for new corpus CWs was 4.3% for types and 6.4% for tokens, with a recall of almost 100%, and wrong/spurious boundaries being more common than missing ones. For pedagogical purposes a morpheme frequency dictionary was constructed for a 16 million word corpus, confirming the importance of agglutinative derivational morphemes in the Esperanto lexicon. Finally, as a means to reduce the morphological ambiguity of CWs, we provide POS likelihoods for Esperanto suffixes.
%U https://aclanthology.org/L16-1171
%P 1075-1078
Markdown (Informal)
[A Morphological Lexicon of Esperanto with Morpheme Frequencies](https://aclanthology.org/L16-1171) (Bick, LREC 2016)
ACL