@inproceedings{yamaguchi-etal-2020-sc,
title = "{SC}-{C}o{MI}cs: A Superconductivity Corpus for Materials Informatics",
author = "Yamaguchi, Kyosuke and
Asahi, Ryoji and
Sasaki, Yutaka",
booktitle = "Proceedings of the 12th Language Resources and Evaluation Conference",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.lrec-1.834",
pages = "6753--6760",
abstract = "This paper describes a novel corpus tailored for the text mining of superconducting materials in Materials Informatics (MI), named SuperConductivety Corpus for Materials Informatics (SC-CoMIcs). Different from biomedical informatics, there exist very few corpora targeting Materials Science and Engineering (MSE). Especially, there is no sizable corpus which can be used to assist the search of superconducting materials. A team of materials scientists and natural language processing experts jointly designed the annotation and constructed a corpus consisting of manually-annotated 1,000 MSE abstracts related to superconductivity. We conducted experiments on the corpus with a neural Named Entity Recognition (NER) tool. The experimental results show that NER performance over the corpus is around 77{\%} in terms of micro-F1, which is comparable to human annotator agreement rates. Using the trained NER model, we automatically annotated 9,000 abstracts and created a term retrieval tool based on the term similarity. This tool can find superconductivity terms relevant to a query term within a specified Named Entity category, which demonstrates the power of our SC-CoMIcs, efficiently providing knowledge for Materials Informatics applications from rapidly expanding publications.",
language = "English",
ISBN = "979-10-95546-34-4",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yamaguchi-etal-2020-sc">
<titleInfo>
<title>SC-CoMIcs: A Superconductivity Corpus for Materials Informatics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kyosuke</namePart>
<namePart type="family">Yamaguchi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ryoji</namePart>
<namePart type="family">Asahi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yutaka</namePart>
<namePart type="family">Sasaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-may</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 12th Language Resources and Evaluation Conference</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-34-4</identifier>
</relatedItem>
<abstract>This paper describes a novel corpus tailored for the text mining of superconducting materials in Materials Informatics (MI), named SuperConductivety Corpus for Materials Informatics (SC-CoMIcs). Different from biomedical informatics, there exist very few corpora targeting Materials Science and Engineering (MSE). Especially, there is no sizable corpus which can be used to assist the search of superconducting materials. A team of materials scientists and natural language processing experts jointly designed the annotation and constructed a corpus consisting of manually-annotated 1,000 MSE abstracts related to superconductivity. We conducted experiments on the corpus with a neural Named Entity Recognition (NER) tool. The experimental results show that NER performance over the corpus is around 77% in terms of micro-F1, which is comparable to human annotator agreement rates. Using the trained NER model, we automatically annotated 9,000 abstracts and created a term retrieval tool based on the term similarity. This tool can find superconductivity terms relevant to a query term within a specified Named Entity category, which demonstrates the power of our SC-CoMIcs, efficiently providing knowledge for Materials Informatics applications from rapidly expanding publications.</abstract>
<identifier type="citekey">yamaguchi-etal-2020-sc</identifier>
<location>
<url>https://aclanthology.org/2020.lrec-1.834</url>
</location>
<part>
<date>2020-may</date>
<extent unit="page">
<start>6753</start>
<end>6760</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SC-CoMIcs: A Superconductivity Corpus for Materials Informatics
%A Yamaguchi, Kyosuke
%A Asahi, Ryoji
%A Sasaki, Yutaka
%S Proceedings of the 12th Language Resources and Evaluation Conference
%D 2020
%8 may
%I European Language Resources Association
%C Marseille, France
%@ 979-10-95546-34-4
%G English
%F yamaguchi-etal-2020-sc
%X This paper describes a novel corpus tailored for the text mining of superconducting materials in Materials Informatics (MI), named SuperConductivety Corpus for Materials Informatics (SC-CoMIcs). Different from biomedical informatics, there exist very few corpora targeting Materials Science and Engineering (MSE). Especially, there is no sizable corpus which can be used to assist the search of superconducting materials. A team of materials scientists and natural language processing experts jointly designed the annotation and constructed a corpus consisting of manually-annotated 1,000 MSE abstracts related to superconductivity. We conducted experiments on the corpus with a neural Named Entity Recognition (NER) tool. The experimental results show that NER performance over the corpus is around 77% in terms of micro-F1, which is comparable to human annotator agreement rates. Using the trained NER model, we automatically annotated 9,000 abstracts and created a term retrieval tool based on the term similarity. This tool can find superconductivity terms relevant to a query term within a specified Named Entity category, which demonstrates the power of our SC-CoMIcs, efficiently providing knowledge for Materials Informatics applications from rapidly expanding publications.
%U https://aclanthology.org/2020.lrec-1.834
%P 6753-6760
Markdown (Informal)
[SC-CoMIcs: A Superconductivity Corpus for Materials Informatics](https://aclanthology.org/2020.lrec-1.834) (Yamaguchi et al., LREC 2020)
ACL