@article{dalvi-mishra-etal-2017-domain,
title = "Domain-Targeted, High Precision Knowledge Extraction",
author = "Dalvi Mishra, Bhavana and
Tandon, Niket and
Clark, Peter",
editor = "Lee, Lillian and
Johnson, Mark and
Toutanova, Kristina",
journal = "Transactions of the Association for Computational Linguistics",
volume = "5",
year = "2017",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/Q17-1017/",
doi = "10.1162/tacl_a_00058",
pages = "233--246",
abstract = "Our goal is to construct a domain-targeted, high precision knowledge base (KB), containing general (subject,predicate,object) statements about the world, in support of a downstream question-answering (QA) application. Despite recent advances in information extraction (IE) techniques, no suitable resource for our task already exists; existing resources are either too noisy, too named-entity centric, or too incomplete, and typically have not been constructed with a clear scope or purpose. To address these, we have created a domain-targeted, high precision knowledge extraction pipeline, leveraging Open IE, crowdsourcing, and a novel canonical schema learning algorithm (called CASI), that produces high precision knowledge targeted to a particular domain - in our case, elementary science. To measure the KB`s coverage of the target domain`s knowledge (its {\textquotedblleft}comprehensiveness{\textquotedblright} with respect to science) we measure recall with respect to an independent corpus of domain text, and show that our pipeline produces output with over 80{\%} precision and 23{\%} recall with respect to that target, a substantially higher coverage of tuple-expressible science knowledge than other comparable resources. We have made the KB publicly available."
}
Markdown (Informal)
[Domain-Targeted, High Precision Knowledge Extraction](https://preview.aclanthology.org/jlcl-multiple-ingestion/Q17-1017/) (Dalvi Mishra et al., TACL 2017)
ACL