@inproceedings{jimenez-gutierrez-etal-2020-document-classification,
title = "Document Classification for {COVID-19} Literature",
author = "Jim{\'e}nez Guti{\'e}rrez, Bernal and
Zeng, Juncheng and
Zhang, Dongdong and
Zhang, Ping and
Su, Yu",
booktitle = "Proceedings of the 1st Workshop on {NLP} for {COVID-19} at {ACL} 2020",
month = jul,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.nlpcovid19-acl.3",
abstract = "The global pandemic has made it more important than ever to quickly and accurately retrieve relevant scientific literature for effective consumption by researchers in a wide range of fields. We provide an analysis of several multi-label document classification models on the LitCovid dataset. We find that pre-trained language models outperform other models in both low and high data regimes, achieving a maximum F1 score of around 86{\%}. We note that even the highest performing models still struggle with label correlation, distraction from introductory text and CORD-19 generalization. Both data and code are available on GitHub.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="jimenez-gutierrez-etal-2020-document-classification">
<titleInfo>
<title>Document Classification for COVID-19 Literature</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bernal</namePart>
<namePart type="family">Jiménez Gutiérrez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juncheng</namePart>
<namePart type="family">Zeng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dongdong</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ping</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yu</namePart>
<namePart type="family">Su</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-jul</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Workshop on NLP for COVID-19 at ACL 2020</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The global pandemic has made it more important than ever to quickly and accurately retrieve relevant scientific literature for effective consumption by researchers in a wide range of fields. We provide an analysis of several multi-label document classification models on the LitCovid dataset. We find that pre-trained language models outperform other models in both low and high data regimes, achieving a maximum F1 score of around 86%. We note that even the highest performing models still struggle with label correlation, distraction from introductory text and CORD-19 generalization. Both data and code are available on GitHub.</abstract>
<identifier type="citekey">jimenez-gutierrez-etal-2020-document-classification</identifier>
<location>
<url>https://aclanthology.org/2020.nlpcovid19-acl.3</url>
</location>
<part>
<date>2020-jul</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Document Classification for COVID-19 Literature
%A Jiménez Gutiérrez, Bernal
%A Zeng, Juncheng
%A Zhang, Dongdong
%A Zhang, Ping
%A Su, Yu
%S Proceedings of the 1st Workshop on NLP for COVID-19 at ACL 2020
%D 2020
%8 jul
%I Association for Computational Linguistics
%C Online
%F jimenez-gutierrez-etal-2020-document-classification
%X The global pandemic has made it more important than ever to quickly and accurately retrieve relevant scientific literature for effective consumption by researchers in a wide range of fields. We provide an analysis of several multi-label document classification models on the LitCovid dataset. We find that pre-trained language models outperform other models in both low and high data regimes, achieving a maximum F1 score of around 86%. We note that even the highest performing models still struggle with label correlation, distraction from introductory text and CORD-19 generalization. Both data and code are available on GitHub.
%U https://aclanthology.org/2020.nlpcovid19-acl.3
Markdown (Informal)
[Document Classification for COVID-19 Literature](https://aclanthology.org/2020.nlpcovid19-acl.3) (Jiménez Gutiérrez et al., NLP-COVID19 2020)
ACL
- Bernal Jiménez Gutiérrez, Juncheng Zeng, Dongdong Zhang, Ping Zhang, and Yu Su. 2020. Document Classification for COVID-19 Literature. In Proceedings of the 1st Workshop on NLP for COVID-19 at ACL 2020, Online. Association for Computational Linguistics.