@article{dalvi-mishra-etal-2017-domain,
title = "Domain-Targeted, High Precision Knowledge Extraction",
author = "Dalvi Mishra, Bhavana and
Tandon, Niket and
Clark, Peter",
journal = "Transactions of the Association for Computational Linguistics",
volume = "5",
year = "2017",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/Q17-1017",
doi = "10.1162/tacl_a_00058",
pages = "233--246",
abstract = "Our goal is to construct a domain-targeted, high precision knowledge base (KB), containing general (subject,predicate,object) statements about the world, in support of a downstream question-answering (QA) application. Despite recent advances in information extraction (IE) techniques, no suitable resource for our task already exists; existing resources are either too noisy, too named-entity centric, or too incomplete, and typically have not been constructed with a clear scope or purpose. To address these, we have created a domain-targeted, high precision knowledge extraction pipeline, leveraging Open IE, crowdsourcing, and a novel canonical schema learning algorithm (called CASI), that produces high precision knowledge targeted to a particular domain - in our case, elementary science. To measure the KB{'}s coverage of the target domain{'}s knowledge (its {``}comprehensiveness{''} with respect to science) we measure recall with respect to an independent corpus of domain text, and show that our pipeline produces output with over 80{\%} precision and 23{\%} recall with respect to that target, a substantially higher coverage of tuple-expressible science knowledge than other comparable resources. We have made the KB publicly available.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dalvi-mishra-etal-2017-domain">
<titleInfo>
<title>Domain-Targeted, High Precision Knowledge Extraction</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bhavana</namePart>
<namePart type="family">Dalvi Mishra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Niket</namePart>
<namePart type="family">Tandon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Peter</namePart>
<namePart type="family">Clark</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre>journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Transactions of the Association for Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre>academic journal</genre>
</relatedItem>
<abstract>Our goal is to construct a domain-targeted, high precision knowledge base (KB), containing general (subject,predicate,object) statements about the world, in support of a downstream question-answering (QA) application. Despite recent advances in information extraction (IE) techniques, no suitable resource for our task already exists; existing resources are either too noisy, too named-entity centric, or too incomplete, and typically have not been constructed with a clear scope or purpose. To address these, we have created a domain-targeted, high precision knowledge extraction pipeline, leveraging Open IE, crowdsourcing, and a novel canonical schema learning algorithm (called CASI), that produces high precision knowledge targeted to a particular domain - in our case, elementary science. To measure the KB’s coverage of the target domain’s knowledge (its “comprehensiveness” with respect to science) we measure recall with respect to an independent corpus of domain text, and show that our pipeline produces output with over 80% precision and 23% recall with respect to that target, a substantially higher coverage of tuple-expressible science knowledge than other comparable resources. We have made the KB publicly available.</abstract>
<identifier type="citekey">dalvi-mishra-etal-2017-domain</identifier>
<identifier type="doi">10.1162/tacl_a_00058</identifier>
<location>
<url>https://aclanthology.org/Q17-1017</url>
</location>
<part>
<date>2017</date>
<detail type="volume"><number>5</number></detail>
<extent unit="page">
<start>233</start>
<end>246</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T Domain-Targeted, High Precision Knowledge Extraction
%A Dalvi Mishra, Bhavana
%A Tandon, Niket
%A Clark, Peter
%J Transactions of the Association for Computational Linguistics
%D 2017
%V 5
%I MIT Press
%C Cambridge, MA
%F dalvi-mishra-etal-2017-domain
%X Our goal is to construct a domain-targeted, high precision knowledge base (KB), containing general (subject,predicate,object) statements about the world, in support of a downstream question-answering (QA) application. Despite recent advances in information extraction (IE) techniques, no suitable resource for our task already exists; existing resources are either too noisy, too named-entity centric, or too incomplete, and typically have not been constructed with a clear scope or purpose. To address these, we have created a domain-targeted, high precision knowledge extraction pipeline, leveraging Open IE, crowdsourcing, and a novel canonical schema learning algorithm (called CASI), that produces high precision knowledge targeted to a particular domain - in our case, elementary science. To measure the KB’s coverage of the target domain’s knowledge (its “comprehensiveness” with respect to science) we measure recall with respect to an independent corpus of domain text, and show that our pipeline produces output with over 80% precision and 23% recall with respect to that target, a substantially higher coverage of tuple-expressible science knowledge than other comparable resources. We have made the KB publicly available.
%9 journal article
%R 10.1162/tacl_a_00058
%U https://aclanthology.org/Q17-1017
%U https://doi.org/10.1162/tacl_a_00058
%P 233-246
Markdown (Informal)
[Domain-Targeted, High Precision Knowledge Extraction](https://aclanthology.org/Q17-1017) (Dalvi Mishra et al., TACL 2017)
ACL