@inproceedings{smith-etal-2020-scienceexamcer,
title = "{S}cience{E}xam{CER}: A High-Density Fine-Grained Science-Domain Corpus for Common Entity Recognition",
author = "Smith, Hannah and
Zhang, Zeyu and
Culnan, John and
Jansen, Peter",
editor = "Calzolari, Nicoletta and
B{\'e}chet, Fr{\'e}d{\'e}ric and
Blache, Philippe and
Choukri, Khalid and
Cieri, Christopher and
Declerck, Thierry and
Goggi, Sara and
Isahara, Hitoshi and
Maegaard, Bente and
Mariani, Joseph and
Mazo, H{\'e}l{\`e}ne and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Twelfth Language Resources and Evaluation Conference",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://preview.aclanthology.org/fix-sig-urls/2020.lrec-1.558/",
pages = "4529--4546",
language = "eng",
ISBN = "979-10-95546-34-4",
abstract = "Named entity recognition identifies common classes of entities in text, but these entity labels are generally sparse, limiting utility to downstream tasks. In this work we present ScienceExamCER, a densely-labeled semantic classification corpus of 133k mentions in the science exam domain where nearly all (96{\%}) of content words have been annotated with one or more fine-grained semantic class labels including taxonomic groups, meronym groups, verb/action groups, properties and values, and synonyms. Semantic class labels are drawn from a manually-constructed fine-grained typology of 601 classes generated through a data-driven analysis of 4,239 science exam questions. We show an off-the-shelf BERT-based named entity recognition model modified for multi-label classification achieves an accuracy of 0.85 F1 on this task, suggesting strong utility for downstream tasks in science domain question answering requiring densely-labeled semantic classification."
}
Markdown (Informal)
[ScienceExamCER: A High-Density Fine-Grained Science-Domain Corpus for Common Entity Recognition](https://preview.aclanthology.org/fix-sig-urls/2020.lrec-1.558/) (Smith et al., LREC 2020)
ACL