@inproceedings{yarlott-etal-2018-identifying,
title = "Identifying the Discourse Function of News Article Paragraphs",
author = "Yarlott, W. Victor and
Cornelio, Cristina and
Gao, Tian and
Finlayson, Mark",
booktitle = "Proceedings of the Workshop Events and Stories in the News 2018",
month = aug,
year = "2018",
address = "Santa Fe, New Mexico, U.S.A",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W18-4304",
pages = "25--33",
abstract = "Discourse structure is a key aspect of all forms of text, providing valuable information both to humans and machines. We applied the hierarchical theory of news discourse developed by van Dijk to examine how paragraphs operate as units of discourse structure within news articles{---}what we refer to here as document-level discourse. This document-level discourse provides a characterization of the content of each paragraph that describes its relation to the events presented in the article (such as main events, backgrounds, and consequences) as well as to other components of the story (such as commentary and evaluation). The purpose of a news discourse section is of great utility to story understanding as it affects both the importance and temporal order of items introduced in the text{---}therefore, if we know the news discourse purpose for different sections, we should be able to better rank events for their importance and better construct timelines. We test two hypotheses: first, that people can reliably annotate news articles with van Dijk{'}s theory; second, that we can reliably predict these labels using machine learning. We show that people have a high degree of agreement with each other when annotating the theory (F1 {\textgreater} 0.8, Cohen{'}s kappa {\textgreater} 0.6), demonstrating that it can be both learned and reliably applied by human annotators. Additionally, we demonstrate first steps toward machine learning of the theory, achieving a performance of F1 = 0.54, which is 65{\%} of human performance. Moreover, we have generated a gold-standard, adjudicated corpus of 50 documents for document-level discourse annotation based on the ACE Phase 2 corpus.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yarlott-etal-2018-identifying">
<titleInfo>
<title>Identifying the Discourse Function of News Article Paragraphs</title>
</titleInfo>
<name type="personal">
<namePart type="given">W</namePart>
<namePart type="given">Victor</namePart>
<namePart type="family">Yarlott</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cristina</namePart>
<namePart type="family">Cornelio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tian</namePart>
<namePart type="family">Gao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">Finlayson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-aug</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Workshop Events and Stories in the News 2018</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Santa Fe, New Mexico, U.S.A</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Discourse structure is a key aspect of all forms of text, providing valuable information both to humans and machines. We applied the hierarchical theory of news discourse developed by van Dijk to examine how paragraphs operate as units of discourse structure within news articles—what we refer to here as document-level discourse. This document-level discourse provides a characterization of the content of each paragraph that describes its relation to the events presented in the article (such as main events, backgrounds, and consequences) as well as to other components of the story (such as commentary and evaluation). The purpose of a news discourse section is of great utility to story understanding as it affects both the importance and temporal order of items introduced in the text—therefore, if we know the news discourse purpose for different sections, we should be able to better rank events for their importance and better construct timelines. We test two hypotheses: first, that people can reliably annotate news articles with van Dijk’s theory; second, that we can reliably predict these labels using machine learning. We show that people have a high degree of agreement with each other when annotating the theory (F1 \textgreater 0.8, Cohen’s kappa \textgreater 0.6), demonstrating that it can be both learned and reliably applied by human annotators. Additionally, we demonstrate first steps toward machine learning of the theory, achieving a performance of F1 = 0.54, which is 65% of human performance. Moreover, we have generated a gold-standard, adjudicated corpus of 50 documents for document-level discourse annotation based on the ACE Phase 2 corpus.</abstract>
<identifier type="citekey">yarlott-etal-2018-identifying</identifier>
<location>
<url>https://aclanthology.org/W18-4304</url>
</location>
<part>
<date>2018-aug</date>
<extent unit="page">
<start>25</start>
<end>33</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Identifying the Discourse Function of News Article Paragraphs
%A Yarlott, W. Victor
%A Cornelio, Cristina
%A Gao, Tian
%A Finlayson, Mark
%S Proceedings of the Workshop Events and Stories in the News 2018
%D 2018
%8 aug
%I Association for Computational Linguistics
%C Santa Fe, New Mexico, U.S.A
%F yarlott-etal-2018-identifying
%X Discourse structure is a key aspect of all forms of text, providing valuable information both to humans and machines. We applied the hierarchical theory of news discourse developed by van Dijk to examine how paragraphs operate as units of discourse structure within news articles—what we refer to here as document-level discourse. This document-level discourse provides a characterization of the content of each paragraph that describes its relation to the events presented in the article (such as main events, backgrounds, and consequences) as well as to other components of the story (such as commentary and evaluation). The purpose of a news discourse section is of great utility to story understanding as it affects both the importance and temporal order of items introduced in the text—therefore, if we know the news discourse purpose for different sections, we should be able to better rank events for their importance and better construct timelines. We test two hypotheses: first, that people can reliably annotate news articles with van Dijk’s theory; second, that we can reliably predict these labels using machine learning. We show that people have a high degree of agreement with each other when annotating the theory (F1 \textgreater 0.8, Cohen’s kappa \textgreater 0.6), demonstrating that it can be both learned and reliably applied by human annotators. Additionally, we demonstrate first steps toward machine learning of the theory, achieving a performance of F1 = 0.54, which is 65% of human performance. Moreover, we have generated a gold-standard, adjudicated corpus of 50 documents for document-level discourse annotation based on the ACE Phase 2 corpus.
%U https://aclanthology.org/W18-4304
%P 25-33
Markdown (Informal)
[Identifying the Discourse Function of News Article Paragraphs](https://aclanthology.org/W18-4304) (Yarlott et al., 2018)
ACL