@inproceedings{loffler-etal-2020-tag,
title = "Tag Me If You Can! Semantic Annotation of Biodiversity Metadata with the {QEMP} Corpus and the {B}iodiv{T}agger",
author = {L{\"o}ffler, Felicitas and
Abdelmageed, Nora and
Babalou, Samira and
Kaur, Pawandeep and
K{\"o}nig-Ries, Birgitta},
booktitle = "Proceedings of the 12th Language Resources and Evaluation Conference",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.lrec-1.560",
pages = "4557--4564",
abstract = "Dataset Retrieval is gaining importance due to a large amount of research data and the great demand for reusing scientific data. Dataset Retrieval is mostly based on metadata, structured information about the primary data. Enriching these metadata with semantic annotations based on Linked Open Data (LOD) enables datasets, publications and authors to be connected and expands the search on semantically related terms. In this work, we introduce the BiodivTagger, an ontology-based Information Extraction pipeline, developed for metadata from biodiversity research. The system recognizes biological, physical and chemical processes, environmental terms, data parameters and phenotypes as well as materials and chemical compounds and links them to concepts in dedicated ontologies. To evaluate our pipeline, we created a gold standard of 50 metadata files (QEMP corpus) selected from five different data repositories in biodiversity research. To the best of our knowledge, this is the first annotated metadata corpus for biodiversity research data. The results reveal a mixed picture. While materials and data parameters are properly matched to ontological concepts in most cases, some ontological issues occurred for processes and environmental terms.",
language = "English",
ISBN = "979-10-95546-34-4",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="loffler-etal-2020-tag">
<titleInfo>
<title>Tag Me If You Can! Semantic Annotation of Biodiversity Metadata with the QEMP Corpus and the BiodivTagger</title>
</titleInfo>
<name type="personal">
<namePart type="given">Felicitas</namePart>
<namePart type="family">Löffler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nora</namePart>
<namePart type="family">Abdelmageed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Samira</namePart>
<namePart type="family">Babalou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pawandeep</namePart>
<namePart type="family">Kaur</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Birgitta</namePart>
<namePart type="family">König-Ries</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-may</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 12th Language Resources and Evaluation Conference</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-34-4</identifier>
</relatedItem>
<abstract>Dataset Retrieval is gaining importance due to a large amount of research data and the great demand for reusing scientific data. Dataset Retrieval is mostly based on metadata, structured information about the primary data. Enriching these metadata with semantic annotations based on Linked Open Data (LOD) enables datasets, publications and authors to be connected and expands the search on semantically related terms. In this work, we introduce the BiodivTagger, an ontology-based Information Extraction pipeline, developed for metadata from biodiversity research. The system recognizes biological, physical and chemical processes, environmental terms, data parameters and phenotypes as well as materials and chemical compounds and links them to concepts in dedicated ontologies. To evaluate our pipeline, we created a gold standard of 50 metadata files (QEMP corpus) selected from five different data repositories in biodiversity research. To the best of our knowledge, this is the first annotated metadata corpus for biodiversity research data. The results reveal a mixed picture. While materials and data parameters are properly matched to ontological concepts in most cases, some ontological issues occurred for processes and environmental terms.</abstract>
<identifier type="citekey">loffler-etal-2020-tag</identifier>
<location>
<url>https://aclanthology.org/2020.lrec-1.560</url>
</location>
<part>
<date>2020-may</date>
<extent unit="page">
<start>4557</start>
<end>4564</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Tag Me If You Can! Semantic Annotation of Biodiversity Metadata with the QEMP Corpus and the BiodivTagger
%A Löffler, Felicitas
%A Abdelmageed, Nora
%A Babalou, Samira
%A Kaur, Pawandeep
%A König-Ries, Birgitta
%S Proceedings of the 12th Language Resources and Evaluation Conference
%D 2020
%8 may
%I European Language Resources Association
%C Marseille, France
%@ 979-10-95546-34-4
%G English
%F loffler-etal-2020-tag
%X Dataset Retrieval is gaining importance due to a large amount of research data and the great demand for reusing scientific data. Dataset Retrieval is mostly based on metadata, structured information about the primary data. Enriching these metadata with semantic annotations based on Linked Open Data (LOD) enables datasets, publications and authors to be connected and expands the search on semantically related terms. In this work, we introduce the BiodivTagger, an ontology-based Information Extraction pipeline, developed for metadata from biodiversity research. The system recognizes biological, physical and chemical processes, environmental terms, data parameters and phenotypes as well as materials and chemical compounds and links them to concepts in dedicated ontologies. To evaluate our pipeline, we created a gold standard of 50 metadata files (QEMP corpus) selected from five different data repositories in biodiversity research. To the best of our knowledge, this is the first annotated metadata corpus for biodiversity research data. The results reveal a mixed picture. While materials and data parameters are properly matched to ontological concepts in most cases, some ontological issues occurred for processes and environmental terms.
%U https://aclanthology.org/2020.lrec-1.560
%P 4557-4564
Markdown (Informal)
[Tag Me If You Can! Semantic Annotation of Biodiversity Metadata with the QEMP Corpus and the BiodivTagger](https://aclanthology.org/2020.lrec-1.560) (Löffler et al., LREC 2020)
ACL