@inproceedings{laur-etal-2020-estnltk,
title = "{E}st{NLTK} 1.6: Remastered {E}stonian {NLP} Pipeline",
author = {Laur, Sven and
Orasmaa, Siim and
S{\"a}rg, Dage and
Tammo, Paul},
booktitle = "Proceedings of the 12th Language Resources and Evaluation Conference",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.lrec-1.884",
pages = "7152--7160",
abstract = "The goal of the EstNLTK Python library is to provide a unified programming interface for natural language processing in Estonian. As such, previous versions of the library have been immensely successful both in academic and industrial circles. However, they also contained serious structural limitations {--} it was hard to add new components and there was a lack of fine-grained control needed for back-end programming. These issues have been explicitly addressed in the EstNLTK library while preserving the intuitive interface for novices. We have remastered the basic NLP pipeline by adding many data cleaning steps that are necessary for analyzing real-life texts, and state of the art components for morphological analysis and fact extraction. Our evaluation on unlabelled data shows that the remastered basic NLP pipeline outperforms both the previous version of the toolkit, as well as neural models of StanfordNLP. In addition, EstNLTK contains a new interface for storing, processing and querying text objects in Postgres database which greatly simplifies processing of large text collections. EstNLTK is freely available under the GNU GPL version 2 license, which is standard for academic software.",
language = "English",
ISBN = "979-10-95546-34-4",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="laur-etal-2020-estnltk">
<titleInfo>
<title>EstNLTK 1.6: Remastered Estonian NLP Pipeline</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sven</namePart>
<namePart type="family">Laur</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Siim</namePart>
<namePart type="family">Orasmaa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dage</namePart>
<namePart type="family">Särg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Tammo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-may</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 12th Language Resources and Evaluation Conference</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-34-4</identifier>
</relatedItem>
<abstract>The goal of the EstNLTK Python library is to provide a unified programming interface for natural language processing in Estonian. As such, previous versions of the library have been immensely successful both in academic and industrial circles. However, they also contained serious structural limitations – it was hard to add new components and there was a lack of fine-grained control needed for back-end programming. These issues have been explicitly addressed in the EstNLTK library while preserving the intuitive interface for novices. We have remastered the basic NLP pipeline by adding many data cleaning steps that are necessary for analyzing real-life texts, and state of the art components for morphological analysis and fact extraction. Our evaluation on unlabelled data shows that the remastered basic NLP pipeline outperforms both the previous version of the toolkit, as well as neural models of StanfordNLP. In addition, EstNLTK contains a new interface for storing, processing and querying text objects in Postgres database which greatly simplifies processing of large text collections. EstNLTK is freely available under the GNU GPL version 2 license, which is standard for academic software.</abstract>
<identifier type="citekey">laur-etal-2020-estnltk</identifier>
<location>
<url>https://aclanthology.org/2020.lrec-1.884</url>
</location>
<part>
<date>2020-may</date>
<extent unit="page">
<start>7152</start>
<end>7160</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T EstNLTK 1.6: Remastered Estonian NLP Pipeline
%A Laur, Sven
%A Orasmaa, Siim
%A Särg, Dage
%A Tammo, Paul
%S Proceedings of the 12th Language Resources and Evaluation Conference
%D 2020
%8 may
%I European Language Resources Association
%C Marseille, France
%@ 979-10-95546-34-4
%G English
%F laur-etal-2020-estnltk
%X The goal of the EstNLTK Python library is to provide a unified programming interface for natural language processing in Estonian. As such, previous versions of the library have been immensely successful both in academic and industrial circles. However, they also contained serious structural limitations – it was hard to add new components and there was a lack of fine-grained control needed for back-end programming. These issues have been explicitly addressed in the EstNLTK library while preserving the intuitive interface for novices. We have remastered the basic NLP pipeline by adding many data cleaning steps that are necessary for analyzing real-life texts, and state of the art components for morphological analysis and fact extraction. Our evaluation on unlabelled data shows that the remastered basic NLP pipeline outperforms both the previous version of the toolkit, as well as neural models of StanfordNLP. In addition, EstNLTK contains a new interface for storing, processing and querying text objects in Postgres database which greatly simplifies processing of large text collections. EstNLTK is freely available under the GNU GPL version 2 license, which is standard for academic software.
%U https://aclanthology.org/2020.lrec-1.884
%P 7152-7160
Markdown (Informal)
[EstNLTK 1.6: Remastered Estonian NLP Pipeline](https://aclanthology.org/2020.lrec-1.884) (Laur et al., LREC 2020)
ACL
- Sven Laur, Siim Orasmaa, Dage Särg, and Paul Tammo. 2020. EstNLTK 1.6: Remastered Estonian NLP Pipeline. In Proceedings of the 12th Language Resources and Evaluation Conference, pages 7152–7160, Marseille, France. European Language Resources Association.