@inproceedings{xu-etal-2020-matinf,
title = "{MATINF}: A Jointly Labeled Large-Scale Dataset for Classification, Question Answering and Summarization",
author = "Xu, Canwen and
Pei, Jiaxin and
Wu, Hongtao and
Liu, Yiyu and
Li, Chenliang",
editor = "Jurafsky, Dan and
Chai, Joyce and
Schluter, Natalie and
Tetreault, Joel",
booktitle = "Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics",
month = jul,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2020.acl-main.330/",
doi = "10.18653/v1/2020.acl-main.330",
pages = "3586--3596",
abstract = "Recently, large-scale datasets have vastly facilitated the development in nearly all domains of Natural Language Processing. However, there is currently no cross-task dataset in NLP, which hinders the development of multi-task learning. We propose MATINF, the first jointly labeled large-scale dataset for classification, question answering and summarization. MATINF contains 1.07 million question-answer pairs with human-labeled categories and user-generated question descriptions. Based on such rich information, MATINF is applicable for three major NLP tasks, including classification, question answering, and summarization. We benchmark existing methods and a novel multi-task baseline over MATINF to inspire further research. Our comprehensive comparison and experiments over MATINF and other datasets demonstrate the merits held by MATINF."
}
Markdown (Informal)
[MATINF: A Jointly Labeled Large-Scale Dataset for Classification, Question Answering and Summarization](https://preview.aclanthology.org/fix-sig-urls/2020.acl-main.330/) (Xu et al., ACL 2020)
ACL