@inproceedings{consoli-etal-2020-embeddings,
title = "Embeddings for Named Entity Recognition in Geoscience {P}ortuguese Literature",
author = "Consoli, Bernardo and
Santos, Joaquim and
Gomes, Diogo and
Cordeiro, Fabio and
Vieira, Renata and
Moreira, Viviane",
booktitle = "Proceedings of the 12th Language Resources and Evaluation Conference",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.lrec-1.568",
pages = "4625--4630",
abstract = "This work focuses on Portuguese Named Entity Recognition (NER) in the Geology domain. The only domain-specific dataset in the Portuguese language annotated for NER is the GeoCorpus. Our approach relies on BiLSTM-CRF neural networks (a widely used type of network for this area of research) that use vector and tensor embedding representations. Three types of embedding models were used (Word Embeddings, Flair Embeddings, and Stacked Embeddings) under two versions (domain-specific and generalized). The domain specific Flair Embeddings model was originally trained with a generalized context in mind, but was then fine-tuned with domain-specific Oil and Gas corpora, as there simply was not enough domain corpora to properly train such a model. Each of these embeddings was evaluated separately, as well as stacked with another embedding. Finally, we achieved state-of-the-art results for this domain with one of our embeddings, and we performed an error analysis on the language model that achieved the best results. Furthermore, we investigated the effects of domain-specific versus generalized embeddings.",
language = "English",
ISBN = "979-10-95546-34-4",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="consoli-etal-2020-embeddings">
<titleInfo>
<title>Embeddings for Named Entity Recognition in Geoscience Portuguese Literature</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bernardo</namePart>
<namePart type="family">Consoli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joaquim</namePart>
<namePart type="family">Santos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Diogo</namePart>
<namePart type="family">Gomes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fabio</namePart>
<namePart type="family">Cordeiro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Renata</namePart>
<namePart type="family">Vieira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-may</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 12th Language Resources and Evaluation Conference</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-34-4</identifier>
</relatedItem>
<abstract>This work focuses on Portuguese Named Entity Recognition (NER) in the Geology domain. The only domain-specific dataset in the Portuguese language annotated for NER is the GeoCorpus. Our approach relies on BiLSTM-CRF neural networks (a widely used type of network for this area of research) that use vector and tensor embedding representations. Three types of embedding models were used (Word Embeddings, Flair Embeddings, and Stacked Embeddings) under two versions (domain-specific and generalized). The domain specific Flair Embeddings model was originally trained with a generalized context in mind, but was then fine-tuned with domain-specific Oil and Gas corpora, as there simply was not enough domain corpora to properly train such a model. Each of these embeddings was evaluated separately, as well as stacked with another embedding. Finally, we achieved state-of-the-art results for this domain with one of our embeddings, and we performed an error analysis on the language model that achieved the best results. Furthermore, we investigated the effects of domain-specific versus generalized embeddings.</abstract>
<identifier type="citekey">consoli-etal-2020-embeddings</identifier>
<location>
<url>https://aclanthology.org/2020.lrec-1.568</url>
</location>
<part>
<date>2020-may</date>
<extent unit="page">
<start>4625</start>
<end>4630</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Embeddings for Named Entity Recognition in Geoscience Portuguese Literature
%A Consoli, Bernardo
%A Santos, Joaquim
%A Gomes, Diogo
%A Cordeiro, Fabio
%A Vieira, Renata
%A Moreira, Viviane
%S Proceedings of the 12th Language Resources and Evaluation Conference
%D 2020
%8 may
%I European Language Resources Association
%C Marseille, France
%@ 979-10-95546-34-4
%G English
%F consoli-etal-2020-embeddings
%X This work focuses on Portuguese Named Entity Recognition (NER) in the Geology domain. The only domain-specific dataset in the Portuguese language annotated for NER is the GeoCorpus. Our approach relies on BiLSTM-CRF neural networks (a widely used type of network for this area of research) that use vector and tensor embedding representations. Three types of embedding models were used (Word Embeddings, Flair Embeddings, and Stacked Embeddings) under two versions (domain-specific and generalized). The domain specific Flair Embeddings model was originally trained with a generalized context in mind, but was then fine-tuned with domain-specific Oil and Gas corpora, as there simply was not enough domain corpora to properly train such a model. Each of these embeddings was evaluated separately, as well as stacked with another embedding. Finally, we achieved state-of-the-art results for this domain with one of our embeddings, and we performed an error analysis on the language model that achieved the best results. Furthermore, we investigated the effects of domain-specific versus generalized embeddings.
%U https://aclanthology.org/2020.lrec-1.568
%P 4625-4630
Markdown (Informal)
[Embeddings for Named Entity Recognition in Geoscience Portuguese Literature](https://aclanthology.org/2020.lrec-1.568) (Consoli et al., LREC 2020)
ACL