@inproceedings{pant-etal-2019-smokeng,
title = "{S}mok{E}ng: Towards Fine-grained Classification of Tobacco-related Social Media Text",
author = "Pant, Kartikey and
Yanamandra, Venkata Himakar and
Debnath, Alok and
Mamidi, Radhika",
booktitle = "Proceedings of the 5th Workshop on Noisy User-generated Text (W-NUT 2019)",
month = nov,
year = "2019",
address = "Hong Kong, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/D19-5524",
doi = "10.18653/v1/D19-5524",
pages = "181--190",
abstract = "Contemporary datasets on tobacco consumption focus on one of two topics, either public health mentions and disease surveillance, or sentiment analysis on topical tobacco products and services. However, two primary considerations are not accounted for, the language of the demographic affected and a combination of the topics mentioned above in a fine-grained classification mechanism. In this paper, we create a dataset of 3144 tweets, which are selected based on the presence of colloquial slang related to smoking and analyze it based on the semantics of the tweet. Each class is created and annotated based on the content of the tweets such that further hierarchical methods can be easily applied. Further, we prove the efficacy of standard text classification methods on this dataset, by designing experiments which do both binary as well as multi-class classification. Our experiments tackle the identification of either a specific topic (such as tobacco product promotion), a general mention (cigarettes and related products) or a more fine-grained classification. This methodology paves the way for further analysis, such as understanding sentiment or style, which makes this dataset a vital contribution to both disease surveillance and tobacco use research.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="pant-etal-2019-smokeng">
<titleInfo>
<title>SmokEng: Towards Fine-grained Classification of Tobacco-related Social Media Text</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kartikey</namePart>
<namePart type="family">Pant</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Venkata</namePart>
<namePart type="given">Himakar</namePart>
<namePart type="family">Yanamandra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alok</namePart>
<namePart type="family">Debnath</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Radhika</namePart>
<namePart type="family">Mamidi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-nov</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 5th Workshop on Noisy User-generated Text (W-NUT 2019)</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Hong Kong, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Contemporary datasets on tobacco consumption focus on one of two topics, either public health mentions and disease surveillance, or sentiment analysis on topical tobacco products and services. However, two primary considerations are not accounted for, the language of the demographic affected and a combination of the topics mentioned above in a fine-grained classification mechanism. In this paper, we create a dataset of 3144 tweets, which are selected based on the presence of colloquial slang related to smoking and analyze it based on the semantics of the tweet. Each class is created and annotated based on the content of the tweets such that further hierarchical methods can be easily applied. Further, we prove the efficacy of standard text classification methods on this dataset, by designing experiments which do both binary as well as multi-class classification. Our experiments tackle the identification of either a specific topic (such as tobacco product promotion), a general mention (cigarettes and related products) or a more fine-grained classification. This methodology paves the way for further analysis, such as understanding sentiment or style, which makes this dataset a vital contribution to both disease surveillance and tobacco use research.</abstract>
<identifier type="citekey">pant-etal-2019-smokeng</identifier>
<identifier type="doi">10.18653/v1/D19-5524</identifier>
<location>
<url>https://aclanthology.org/D19-5524</url>
</location>
<part>
<date>2019-nov</date>
<extent unit="page">
<start>181</start>
<end>190</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SmokEng: Towards Fine-grained Classification of Tobacco-related Social Media Text
%A Pant, Kartikey
%A Yanamandra, Venkata Himakar
%A Debnath, Alok
%A Mamidi, Radhika
%S Proceedings of the 5th Workshop on Noisy User-generated Text (W-NUT 2019)
%D 2019
%8 nov
%I Association for Computational Linguistics
%C Hong Kong, China
%F pant-etal-2019-smokeng
%X Contemporary datasets on tobacco consumption focus on one of two topics, either public health mentions and disease surveillance, or sentiment analysis on topical tobacco products and services. However, two primary considerations are not accounted for, the language of the demographic affected and a combination of the topics mentioned above in a fine-grained classification mechanism. In this paper, we create a dataset of 3144 tweets, which are selected based on the presence of colloquial slang related to smoking and analyze it based on the semantics of the tweet. Each class is created and annotated based on the content of the tweets such that further hierarchical methods can be easily applied. Further, we prove the efficacy of standard text classification methods on this dataset, by designing experiments which do both binary as well as multi-class classification. Our experiments tackle the identification of either a specific topic (such as tobacco product promotion), a general mention (cigarettes and related products) or a more fine-grained classification. This methodology paves the way for further analysis, such as understanding sentiment or style, which makes this dataset a vital contribution to both disease surveillance and tobacco use research.
%R 10.18653/v1/D19-5524
%U https://aclanthology.org/D19-5524
%U https://doi.org/10.18653/v1/D19-5524
%P 181-190
Markdown (Informal)
[SmokEng: Towards Fine-grained Classification of Tobacco-related Social Media Text](https://aclanthology.org/D19-5524) (Pant et al., EMNLP 2019)
ACL