@inproceedings{bordea-etal-2020-evaluation,
title = "Evaluation Dataset and Methodology for Extracting Application-Specific Taxonomies from the {W}ikipedia Knowledge Graph",
author = "Bordea, Georgeta and
Faralli, Stefano and
Mougin, Fleur and
Buitelaar, Paul and
Diallo, Gayo",
booktitle = "Proceedings of the 12th Language Resources and Evaluation Conference",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.lrec-1.285",
pages = "2341--2347",
abstract = "In this work, we address the task of extracting application-specific taxonomies from the category hierarchy of Wikipedia. Previous work on pruning the Wikipedia knowledge graph relied on silver standard taxonomies which can only be automatically extracted for a small subset of domains rooted in relatively focused nodes, placed at an intermediate level in the knowledge graphs. In this work, we propose an iterative methodology to extract an application-specific gold standard dataset from a knowledge graph and an evaluation framework to comparatively assess the quality of noisy automatically extracted taxonomies. We employ an existing state of the art algorithm in an iterative manner and we propose several sampling strategies to reduce the amount of manual work needed for evaluation. A first gold standard dataset is released to the research community for this task along with a companion evaluation framework. This dataset addresses a real-world application from the medical domain, namely the extraction of food-drug and herb-drug interactions.",
language = "English",
ISBN = "979-10-95546-34-4",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bordea-etal-2020-evaluation">
<titleInfo>
<title>Evaluation Dataset and Methodology for Extracting Application-Specific Taxonomies from the Wikipedia Knowledge Graph</title>
</titleInfo>
<name type="personal">
<namePart type="given">Georgeta</namePart>
<namePart type="family">Bordea</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stefano</namePart>
<namePart type="family">Faralli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fleur</namePart>
<namePart type="family">Mougin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Buitelaar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gayo</namePart>
<namePart type="family">Diallo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-may</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 12th Language Resources and Evaluation Conference</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-34-4</identifier>
</relatedItem>
<abstract>In this work, we address the task of extracting application-specific taxonomies from the category hierarchy of Wikipedia. Previous work on pruning the Wikipedia knowledge graph relied on silver standard taxonomies which can only be automatically extracted for a small subset of domains rooted in relatively focused nodes, placed at an intermediate level in the knowledge graphs. In this work, we propose an iterative methodology to extract an application-specific gold standard dataset from a knowledge graph and an evaluation framework to comparatively assess the quality of noisy automatically extracted taxonomies. We employ an existing state of the art algorithm in an iterative manner and we propose several sampling strategies to reduce the amount of manual work needed for evaluation. A first gold standard dataset is released to the research community for this task along with a companion evaluation framework. This dataset addresses a real-world application from the medical domain, namely the extraction of food-drug and herb-drug interactions.</abstract>
<identifier type="citekey">bordea-etal-2020-evaluation</identifier>
<location>
<url>https://aclanthology.org/2020.lrec-1.285</url>
</location>
<part>
<date>2020-may</date>
<extent unit="page">
<start>2341</start>
<end>2347</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Evaluation Dataset and Methodology for Extracting Application-Specific Taxonomies from the Wikipedia Knowledge Graph
%A Bordea, Georgeta
%A Faralli, Stefano
%A Mougin, Fleur
%A Buitelaar, Paul
%A Diallo, Gayo
%S Proceedings of the 12th Language Resources and Evaluation Conference
%D 2020
%8 may
%I European Language Resources Association
%C Marseille, France
%@ 979-10-95546-34-4
%G English
%F bordea-etal-2020-evaluation
%X In this work, we address the task of extracting application-specific taxonomies from the category hierarchy of Wikipedia. Previous work on pruning the Wikipedia knowledge graph relied on silver standard taxonomies which can only be automatically extracted for a small subset of domains rooted in relatively focused nodes, placed at an intermediate level in the knowledge graphs. In this work, we propose an iterative methodology to extract an application-specific gold standard dataset from a knowledge graph and an evaluation framework to comparatively assess the quality of noisy automatically extracted taxonomies. We employ an existing state of the art algorithm in an iterative manner and we propose several sampling strategies to reduce the amount of manual work needed for evaluation. A first gold standard dataset is released to the research community for this task along with a companion evaluation framework. This dataset addresses a real-world application from the medical domain, namely the extraction of food-drug and herb-drug interactions.
%U https://aclanthology.org/2020.lrec-1.285
%P 2341-2347
Markdown (Informal)
[Evaluation Dataset and Methodology for Extracting Application-Specific Taxonomies from the Wikipedia Knowledge Graph](https://aclanthology.org/2020.lrec-1.285) (Bordea et al., LREC 2020)
ACL