@inproceedings{kornilova-eidelman-2019-billsum,
title = "{B}ill{S}um: A Corpus for Automatic Summarization of {US} Legislation",
author = "Kornilova, Anastassia and
Eidelman, Vladimir",
booktitle = "Proceedings of the 2nd Workshop on New Frontiers in Summarization",
month = nov,
year = "2019",
address = "Hong Kong, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/D19-5406",
doi = "10.18653/v1/D19-5406",
pages = "48--56",
abstract = "Automatic summarization methods have been studied on a variety of domains, including news and scientific articles. Yet, legislation has not previously been considered for this task, despite US Congress and state governments releasing tens of thousands of bills every year. In this paper, we introduce BillSum, the first dataset for summarization of US Congressional and California state bills. We explain the properties of the dataset that make it more challenging to process than other domains. Then, we benchmark extractive methods that consider neural sentence representations and traditional contextual features. Finally, we demonstrate that models built on Congressional bills can be used to summarize California billa, thus, showing that methods developed on this dataset can transfer to states without human-written summaries.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kornilova-eidelman-2019-billsum">
<titleInfo>
<title>BillSum: A Corpus for Automatic Summarization of US Legislation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anastassia</namePart>
<namePart type="family">Kornilova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vladimir</namePart>
<namePart type="family">Eidelman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-nov</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd Workshop on New Frontiers in Summarization</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Hong Kong, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Automatic summarization methods have been studied on a variety of domains, including news and scientific articles. Yet, legislation has not previously been considered for this task, despite US Congress and state governments releasing tens of thousands of bills every year. In this paper, we introduce BillSum, the first dataset for summarization of US Congressional and California state bills. We explain the properties of the dataset that make it more challenging to process than other domains. Then, we benchmark extractive methods that consider neural sentence representations and traditional contextual features. Finally, we demonstrate that models built on Congressional bills can be used to summarize California billa, thus, showing that methods developed on this dataset can transfer to states without human-written summaries.</abstract>
<identifier type="citekey">kornilova-eidelman-2019-billsum</identifier>
<identifier type="doi">10.18653/v1/D19-5406</identifier>
<location>
<url>https://aclanthology.org/D19-5406</url>
</location>
<part>
<date>2019-nov</date>
<extent unit="page">
<start>48</start>
<end>56</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T BillSum: A Corpus for Automatic Summarization of US Legislation
%A Kornilova, Anastassia
%A Eidelman, Vladimir
%S Proceedings of the 2nd Workshop on New Frontiers in Summarization
%D 2019
%8 nov
%I Association for Computational Linguistics
%C Hong Kong, China
%F kornilova-eidelman-2019-billsum
%X Automatic summarization methods have been studied on a variety of domains, including news and scientific articles. Yet, legislation has not previously been considered for this task, despite US Congress and state governments releasing tens of thousands of bills every year. In this paper, we introduce BillSum, the first dataset for summarization of US Congressional and California state bills. We explain the properties of the dataset that make it more challenging to process than other domains. Then, we benchmark extractive methods that consider neural sentence representations and traditional contextual features. Finally, we demonstrate that models built on Congressional bills can be used to summarize California billa, thus, showing that methods developed on this dataset can transfer to states without human-written summaries.
%R 10.18653/v1/D19-5406
%U https://aclanthology.org/D19-5406
%U https://doi.org/10.18653/v1/D19-5406
%P 48-56
Markdown (Informal)
[BillSum: A Corpus for Automatic Summarization of US Legislation](https://aclanthology.org/D19-5406) (Kornilova & Eidelman, EMNLP 2019)
ACL