@inproceedings{zhang-etal-2021-cough,
title = "{COUGH}: A Challenge Dataset and Models for {COVID}-19 {FAQ} Retrieval",
author = "Zhang, Xinliang Frederick and
Sun, Heming and
Yue, Xiang and
Lin, Simon and
Sun, Huan",
booktitle = "Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2021",
address = "Online and Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.emnlp-main.305",
doi = "10.18653/v1/2021.emnlp-main.305",
pages = "3759--3769",
abstract = "We present a large, challenging dataset, COUGH, for COVID-19 FAQ retrieval. Similar to a standard FAQ dataset, COUGH consists of three parts: FAQ Bank, Query Bank and Relevance Set. The FAQ Bank contains {\textasciitilde}16K FAQ items scraped from 55 credible websites (e.g., CDC and WHO). For evaluation, we introduce Query Bank and Relevance Set, where the former contains 1,236 human-paraphrased queries while the latter contains {\textasciitilde}32 human-annotated FAQ items for each query. We analyze COUGH by testing different FAQ retrieval models built on top of BM25 and BERT, among which the best model achieves 48.8 under P@5, indicating a great challenge presented by COUGH and encouraging future research for further improvement. Our COUGH dataset is available at https://github.com/sunlab-osu/covid-faq.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhang-etal-2021-cough">
<titleInfo>
<title>COUGH: A Challenge Dataset and Models for COVID-19 FAQ Retrieval</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xinliang</namePart>
<namePart type="given">Frederick</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Heming</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiang</namePart>
<namePart type="family">Yue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simon</namePart>
<namePart type="family">Lin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Huan</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-nov</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online and Punta Cana, Dominican Republic</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present a large, challenging dataset, COUGH, for COVID-19 FAQ retrieval. Similar to a standard FAQ dataset, COUGH consists of three parts: FAQ Bank, Query Bank and Relevance Set. The FAQ Bank contains 16K FAQ items scraped from 55 credible websites (e.g., CDC and WHO). For evaluation, we introduce Query Bank and Relevance Set, where the former contains 1,236 human-paraphrased queries while the latter contains 32 human-annotated FAQ items for each query. We analyze COUGH by testing different FAQ retrieval models built on top of BM25 and BERT, among which the best model achieves 48.8 under P@5, indicating a great challenge presented by COUGH and encouraging future research for further improvement. Our COUGH dataset is available at https://github.com/sunlab-osu/covid-faq.</abstract>
<identifier type="citekey">zhang-etal-2021-cough</identifier>
<identifier type="doi">10.18653/v1/2021.emnlp-main.305</identifier>
<location>
<url>https://aclanthology.org/2021.emnlp-main.305</url>
</location>
<part>
<date>2021-nov</date>
<extent unit="page">
<start>3759</start>
<end>3769</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T COUGH: A Challenge Dataset and Models for COVID-19 FAQ Retrieval
%A Zhang, Xinliang Frederick
%A Sun, Heming
%A Yue, Xiang
%A Lin, Simon
%A Sun, Huan
%S Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing
%D 2021
%8 nov
%I Association for Computational Linguistics
%C Online and Punta Cana, Dominican Republic
%F zhang-etal-2021-cough
%X We present a large, challenging dataset, COUGH, for COVID-19 FAQ retrieval. Similar to a standard FAQ dataset, COUGH consists of three parts: FAQ Bank, Query Bank and Relevance Set. The FAQ Bank contains 16K FAQ items scraped from 55 credible websites (e.g., CDC and WHO). For evaluation, we introduce Query Bank and Relevance Set, where the former contains 1,236 human-paraphrased queries while the latter contains 32 human-annotated FAQ items for each query. We analyze COUGH by testing different FAQ retrieval models built on top of BM25 and BERT, among which the best model achieves 48.8 under P@5, indicating a great challenge presented by COUGH and encouraging future research for further improvement. Our COUGH dataset is available at https://github.com/sunlab-osu/covid-faq.
%R 10.18653/v1/2021.emnlp-main.305
%U https://aclanthology.org/2021.emnlp-main.305
%U https://doi.org/10.18653/v1/2021.emnlp-main.305
%P 3759-3769
Markdown (Informal)
[COUGH: A Challenge Dataset and Models for COVID-19 FAQ Retrieval](https://aclanthology.org/2021.emnlp-main.305) (Zhang et al., EMNLP 2021)
ACL
- Xinliang Frederick Zhang, Heming Sun, Xiang Yue, Simon Lin, and Huan Sun. 2021. COUGH: A Challenge Dataset and Models for COVID-19 FAQ Retrieval. In Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing, pages 3759–3769, Online and Punta Cana, Dominican Republic. Association for Computational Linguistics.