@inproceedings{gong-etal-2020-design,
title = "The Design and Construction of a {C}hinese Sarcasm Dataset",
author = "Gong, Xiaochang and
Zhao, Qin and
Zhang, Jun and
Mao, Ruibin and
Xu, Ruifeng",
booktitle = "Proceedings of the 12th Language Resources and Evaluation Conference",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.lrec-1.619",
pages = "5034--5039",
abstract = "As a typical multi-layered semi-conscious language phenomenon, sarcasm is widely existed in social media text for enhancing the emotion expression. Thus, the detection and processing of sarcasm is important to social media analysis.However, most existing sarcasm dataset are in English and there is still a lack of authoritative Chinese sarcasm dataset. In this paper, we presents the design and construction of a largest high-quality Chinese sarcasm dataset, which contains 2,486 manual annotated sarcastic texts and 89,296 non-sarcastic texts. Furthermore, a balanced dataset through elaborately sampling the same amount non-sarcastic texts for training sarcasm classifier. Using the dataset as the benchmark, some sarcasm classification methods are evaluated.",
language = "English",
ISBN = "979-10-95546-34-4",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gong-etal-2020-design">
<titleInfo>
<title>The Design and Construction of a Chinese Sarcasm Dataset</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xiaochang</namePart>
<namePart type="family">Gong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qin</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruibin</namePart>
<namePart type="family">Mao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruifeng</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-may</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 12th Language Resources and Evaluation Conference</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-34-4</identifier>
</relatedItem>
<abstract>As a typical multi-layered semi-conscious language phenomenon, sarcasm is widely existed in social media text for enhancing the emotion expression. Thus, the detection and processing of sarcasm is important to social media analysis.However, most existing sarcasm dataset are in English and there is still a lack of authoritative Chinese sarcasm dataset. In this paper, we presents the design and construction of a largest high-quality Chinese sarcasm dataset, which contains 2,486 manual annotated sarcastic texts and 89,296 non-sarcastic texts. Furthermore, a balanced dataset through elaborately sampling the same amount non-sarcastic texts for training sarcasm classifier. Using the dataset as the benchmark, some sarcasm classification methods are evaluated.</abstract>
<identifier type="citekey">gong-etal-2020-design</identifier>
<location>
<url>https://aclanthology.org/2020.lrec-1.619</url>
</location>
<part>
<date>2020-may</date>
<extent unit="page">
<start>5034</start>
<end>5039</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The Design and Construction of a Chinese Sarcasm Dataset
%A Gong, Xiaochang
%A Zhao, Qin
%A Zhang, Jun
%A Mao, Ruibin
%A Xu, Ruifeng
%S Proceedings of the 12th Language Resources and Evaluation Conference
%D 2020
%8 may
%I European Language Resources Association
%C Marseille, France
%@ 979-10-95546-34-4
%G English
%F gong-etal-2020-design
%X As a typical multi-layered semi-conscious language phenomenon, sarcasm is widely existed in social media text for enhancing the emotion expression. Thus, the detection and processing of sarcasm is important to social media analysis.However, most existing sarcasm dataset are in English and there is still a lack of authoritative Chinese sarcasm dataset. In this paper, we presents the design and construction of a largest high-quality Chinese sarcasm dataset, which contains 2,486 manual annotated sarcastic texts and 89,296 non-sarcastic texts. Furthermore, a balanced dataset through elaborately sampling the same amount non-sarcastic texts for training sarcasm classifier. Using the dataset as the benchmark, some sarcasm classification methods are evaluated.
%U https://aclanthology.org/2020.lrec-1.619
%P 5034-5039
Markdown (Informal)
[The Design and Construction of a Chinese Sarcasm Dataset](https://aclanthology.org/2020.lrec-1.619) (Gong et al., LREC 2020)
ACL