@inproceedings{du-cardie-2018-harvesting,
title = "Harvesting Paragraph-level Question-Answer Pairs from {W}ikipedia",
author = "Du, Xinya and
Cardie, Claire",
editor = "Gurevych, Iryna and
Miyao, Yusuke",
booktitle = "Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2018",
address = "Melbourne, Australia",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/P18-1177/",
doi = "10.18653/v1/P18-1177",
pages = "1907--1917",
abstract = "We study the task of generating from Wikipedia articles question-answer pairs that cover content beyond a single sentence. We propose a neural network approach that incorporates coreference knowledge via a novel gating mechanism. As compared to models that only take into account sentence-level information (Heilman and Smith, 2010; Du et al., 2017; Zhou et al., 2017), we find that the linguistic knowledge introduced by the coreference representation aids question generation significantly, producing models that outperform the current state-of-the-art. We apply our system (composed of an answer span extraction system and the passage-level QG system) to the 10,000 top ranking Wikipedia articles and create a corpus of over one million question-answer pairs. We provide qualitative analysis for the this large-scale generated corpus from Wikipedia."
}
Markdown (Informal)
[Harvesting Paragraph-level Question-Answer Pairs from Wikipedia](https://preview.aclanthology.org/jlcl-multiple-ingestion/P18-1177/) (Du & Cardie, ACL 2018)
ACL