@inproceedings{grimm-cimiano-2021-biquad,
title = "{B}i{Q}u{AD}: Towards {QA} based on deeper text understanding",
author = "Grimm, Frank and
Cimiano, Philipp",
booktitle = "Proceedings of *SEM 2021: The Tenth Joint Conference on Lexical and Computational Semantics",
month = aug,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.starsem-1.10",
doi = "10.18653/v1/2021.starsem-1.10",
pages = "105--115",
abstract = "Recent question answering and machine reading benchmarks frequently reduce the task to one of pinpointing spans within a certain text passage that answers the given question. Typically, these systems are not required to actually understand the text on a deeper level that allows for more complex reasoning on the information contained. We introduce a new dataset called BiQuAD that requires deeper comprehension in order to answer questions in both extractive and deductive fashion. The dataset consist of 4,190 closed-domain texts and a total of 99,149 question-answer pairs. The texts are synthetically generated soccer match reports that verbalize the main events of each match. All texts are accompanied by a structured Datalog program that represents a (logical) model of its information. We show that state-of-the-art QA models do not perform well on the challenging long form contexts and reasoning requirements posed by the dataset. In particular, transformer based state-of-the-art models achieve F1-scores of only 39.0. We demonstrate how these synthetic datasets align structured knowledge with natural text and aid model introspection when approaching complex text understanding.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="grimm-cimiano-2021-biquad">
<titleInfo>
<title>BiQuAD: Towards QA based on deeper text understanding</title>
</titleInfo>
<name type="personal">
<namePart type="given">Frank</namePart>
<namePart type="family">Grimm</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Cimiano</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-aug</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of *SEM 2021: The Tenth Joint Conference on Lexical and Computational Semantics</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Recent question answering and machine reading benchmarks frequently reduce the task to one of pinpointing spans within a certain text passage that answers the given question. Typically, these systems are not required to actually understand the text on a deeper level that allows for more complex reasoning on the information contained. We introduce a new dataset called BiQuAD that requires deeper comprehension in order to answer questions in both extractive and deductive fashion. The dataset consist of 4,190 closed-domain texts and a total of 99,149 question-answer pairs. The texts are synthetically generated soccer match reports that verbalize the main events of each match. All texts are accompanied by a structured Datalog program that represents a (logical) model of its information. We show that state-of-the-art QA models do not perform well on the challenging long form contexts and reasoning requirements posed by the dataset. In particular, transformer based state-of-the-art models achieve F1-scores of only 39.0. We demonstrate how these synthetic datasets align structured knowledge with natural text and aid model introspection when approaching complex text understanding.</abstract>
<identifier type="citekey">grimm-cimiano-2021-biquad</identifier>
<identifier type="doi">10.18653/v1/2021.starsem-1.10</identifier>
<location>
<url>https://aclanthology.org/2021.starsem-1.10</url>
</location>
<part>
<date>2021-aug</date>
<extent unit="page">
<start>105</start>
<end>115</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T BiQuAD: Towards QA based on deeper text understanding
%A Grimm, Frank
%A Cimiano, Philipp
%S Proceedings of *SEM 2021: The Tenth Joint Conference on Lexical and Computational Semantics
%D 2021
%8 aug
%I Association for Computational Linguistics
%C Online
%F grimm-cimiano-2021-biquad
%X Recent question answering and machine reading benchmarks frequently reduce the task to one of pinpointing spans within a certain text passage that answers the given question. Typically, these systems are not required to actually understand the text on a deeper level that allows for more complex reasoning on the information contained. We introduce a new dataset called BiQuAD that requires deeper comprehension in order to answer questions in both extractive and deductive fashion. The dataset consist of 4,190 closed-domain texts and a total of 99,149 question-answer pairs. The texts are synthetically generated soccer match reports that verbalize the main events of each match. All texts are accompanied by a structured Datalog program that represents a (logical) model of its information. We show that state-of-the-art QA models do not perform well on the challenging long form contexts and reasoning requirements posed by the dataset. In particular, transformer based state-of-the-art models achieve F1-scores of only 39.0. We demonstrate how these synthetic datasets align structured knowledge with natural text and aid model introspection when approaching complex text understanding.
%R 10.18653/v1/2021.starsem-1.10
%U https://aclanthology.org/2021.starsem-1.10
%U https://doi.org/10.18653/v1/2021.starsem-1.10
%P 105-115
Markdown (Informal)
[BiQuAD: Towards QA based on deeper text understanding](https://aclanthology.org/2021.starsem-1.10) (Grimm & Cimiano, *SEM 2021)
ACL