@inproceedings{zhong-etal-2019-closer,
title = "A Closer Look at Data Bias in Neural Extractive Summarization Models",
author = "Zhong, Ming and
Wang, Danqing and
Liu, Pengfei and
Qiu, Xipeng and
Huang, Xuanjing",
booktitle = "Proceedings of the 2nd Workshop on New Frontiers in Summarization",
month = nov,
year = "2019",
address = "Hong Kong, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/D19-5410",
doi = "10.18653/v1/D19-5410",
pages = "80--89",
abstract = "In this paper, we take stock of the current state of summarization datasets and explore how different factors of datasets influence the generalization behaviour of neural extractive summarization models. Specifically, we first propose several properties of datasets, which matter for the generalization of summarization models. Then we build the connection between priors residing in datasets and model designs, analyzing how different properties of datasets influence the choices of model structure design and training methods. Finally, by taking a typical dataset as an example, we rethink the process of the model design based on the experience of the above analysis. We demonstrate that when we have a deep understanding of the characteristics of datasets, a simple approach can bring significant improvements to the existing state-of-the-art model.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhong-etal-2019-closer">
<titleInfo>
<title>A Closer Look at Data Bias in Neural Extractive Summarization Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ming</namePart>
<namePart type="family">Zhong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Danqing</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pengfei</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xipeng</namePart>
<namePart type="family">Qiu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xuanjing</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-nov</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd Workshop on New Frontiers in Summarization</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Hong Kong, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we take stock of the current state of summarization datasets and explore how different factors of datasets influence the generalization behaviour of neural extractive summarization models. Specifically, we first propose several properties of datasets, which matter for the generalization of summarization models. Then we build the connection between priors residing in datasets and model designs, analyzing how different properties of datasets influence the choices of model structure design and training methods. Finally, by taking a typical dataset as an example, we rethink the process of the model design based on the experience of the above analysis. We demonstrate that when we have a deep understanding of the characteristics of datasets, a simple approach can bring significant improvements to the existing state-of-the-art model.</abstract>
<identifier type="citekey">zhong-etal-2019-closer</identifier>
<identifier type="doi">10.18653/v1/D19-5410</identifier>
<location>
<url>https://aclanthology.org/D19-5410</url>
</location>
<part>
<date>2019-nov</date>
<extent unit="page">
<start>80</start>
<end>89</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Closer Look at Data Bias in Neural Extractive Summarization Models
%A Zhong, Ming
%A Wang, Danqing
%A Liu, Pengfei
%A Qiu, Xipeng
%A Huang, Xuanjing
%S Proceedings of the 2nd Workshop on New Frontiers in Summarization
%D 2019
%8 nov
%I Association for Computational Linguistics
%C Hong Kong, China
%F zhong-etal-2019-closer
%X In this paper, we take stock of the current state of summarization datasets and explore how different factors of datasets influence the generalization behaviour of neural extractive summarization models. Specifically, we first propose several properties of datasets, which matter for the generalization of summarization models. Then we build the connection between priors residing in datasets and model designs, analyzing how different properties of datasets influence the choices of model structure design and training methods. Finally, by taking a typical dataset as an example, we rethink the process of the model design based on the experience of the above analysis. We demonstrate that when we have a deep understanding of the characteristics of datasets, a simple approach can bring significant improvements to the existing state-of-the-art model.
%R 10.18653/v1/D19-5410
%U https://aclanthology.org/D19-5410
%U https://doi.org/10.18653/v1/D19-5410
%P 80-89
Markdown (Informal)
[A Closer Look at Data Bias in Neural Extractive Summarization Models](https://aclanthology.org/D19-5410) (Zhong et al., EMNLP 2019)
ACL