@inproceedings{iwatsuki-etal-2020-evaluation,
title = "An Evaluation Dataset for Identifying Communicative Functions of Sentences in {E}nglish Scholarly Papers",
author = "Iwatsuki, Kenichi and
Boudin, Florian and
Aizawa, Akiko",
booktitle = "Proceedings of the 12th Language Resources and Evaluation Conference",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.lrec-1.212",
pages = "1712--1720",
abstract = "Formulaic expressions, such as {`}in this paper we propose{'}, are used by authors of scholarly papers to perform communicative functions; the communicative function of the present example is {`}stating the aim of the paper{'}. Collecting such expressions and pairing them with their communicative functions would be highly valuable for various tasks, particularly for writing assistance. However, such collection and paring in a principled and automated manner would require high-quality annotated data, which are not available. In this study, we address this shortcoming by creating a manually annotated dataset for detecting communicative functions in sentences. Starting from a seed list of labelled formulaic expressions, we retrieved new sentences from scholarly papers in the ACL Anthology and asked multiple human evaluators to label communicative functions. To show the usefulness of our dataset, we conducted a series of experiments that determined to what extent sentence representations acquired by recent models, such as word2vec and BERT, can be employed to detect communicative functions in sentences.",
language = "English",
ISBN = "979-10-95546-34-4",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="iwatsuki-etal-2020-evaluation">
<titleInfo>
<title>An Evaluation Dataset for Identifying Communicative Functions of Sentences in English Scholarly Papers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kenichi</namePart>
<namePart type="family">Iwatsuki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Florian</namePart>
<namePart type="family">Boudin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Akiko</namePart>
<namePart type="family">Aizawa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-may</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 12th Language Resources and Evaluation Conference</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-34-4</identifier>
</relatedItem>
<abstract>Formulaic expressions, such as ‘in this paper we propose’, are used by authors of scholarly papers to perform communicative functions; the communicative function of the present example is ‘stating the aim of the paper’. Collecting such expressions and pairing them with their communicative functions would be highly valuable for various tasks, particularly for writing assistance. However, such collection and paring in a principled and automated manner would require high-quality annotated data, which are not available. In this study, we address this shortcoming by creating a manually annotated dataset for detecting communicative functions in sentences. Starting from a seed list of labelled formulaic expressions, we retrieved new sentences from scholarly papers in the ACL Anthology and asked multiple human evaluators to label communicative functions. To show the usefulness of our dataset, we conducted a series of experiments that determined to what extent sentence representations acquired by recent models, such as word2vec and BERT, can be employed to detect communicative functions in sentences.</abstract>
<identifier type="citekey">iwatsuki-etal-2020-evaluation</identifier>
<location>
<url>https://aclanthology.org/2020.lrec-1.212</url>
</location>
<part>
<date>2020-may</date>
<extent unit="page">
<start>1712</start>
<end>1720</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T An Evaluation Dataset for Identifying Communicative Functions of Sentences in English Scholarly Papers
%A Iwatsuki, Kenichi
%A Boudin, Florian
%A Aizawa, Akiko
%S Proceedings of the 12th Language Resources and Evaluation Conference
%D 2020
%8 may
%I European Language Resources Association
%C Marseille, France
%@ 979-10-95546-34-4
%G English
%F iwatsuki-etal-2020-evaluation
%X Formulaic expressions, such as ‘in this paper we propose’, are used by authors of scholarly papers to perform communicative functions; the communicative function of the present example is ‘stating the aim of the paper’. Collecting such expressions and pairing them with their communicative functions would be highly valuable for various tasks, particularly for writing assistance. However, such collection and paring in a principled and automated manner would require high-quality annotated data, which are not available. In this study, we address this shortcoming by creating a manually annotated dataset for detecting communicative functions in sentences. Starting from a seed list of labelled formulaic expressions, we retrieved new sentences from scholarly papers in the ACL Anthology and asked multiple human evaluators to label communicative functions. To show the usefulness of our dataset, we conducted a series of experiments that determined to what extent sentence representations acquired by recent models, such as word2vec and BERT, can be employed to detect communicative functions in sentences.
%U https://aclanthology.org/2020.lrec-1.212
%P 1712-1720
Markdown (Informal)
[An Evaluation Dataset for Identifying Communicative Functions of Sentences in English Scholarly Papers](https://aclanthology.org/2020.lrec-1.212) (Iwatsuki et al., LREC 2020)
ACL