@inproceedings{ashrafi-asli-etal-2020-optimizing,
title = "Optimizing Annotation Effort Using Active Learning Strategies: A Sentiment Analysis Case Study in {P}ersian",
author = "Ashrafi Asli, Seyed Arad and
Sabeti, Behnam and
Majdabadi, Zahra and
Golazizian, Preni and
Fahmi, Reza and
Momenzadeh, Omid",
booktitle = "Proceedings of the 12th Language Resources and Evaluation Conference",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.lrec-1.348",
pages = "2855--2861",
abstract = "Deep learning models are the current State-of-the-art methodologies towards many real-world problems. However, they need a substantial amount of labeled data to be trained appropriately. Acquiring labeled data can be challenging in some particular domains or less-resourced languages. There are some practical solutions regarding these issues, such as Active Learning and Transfer Learning. Active learning{'}s idea is simple: let the model choose the samples for annotation instead of labeling the whole dataset. This method leads to a more efficient annotation process. Active Learning models can achieve the baseline performance (the accuracy of the model trained on the whole dataset), with a considerably lower amount of labeled data. Several active learning approaches are tested in this work, and their compatibility with Persian is examined using a brand-new sentiment analysis dataset that is also introduced in this work. MirasOpinion, which to our knowledge is the largest Persian sentiment analysis dataset, is crawled from a Persian e-commerce website and annotated using a crowd-sourcing policy. LDA sampling, which is an efficient Active Learning strategy using Topic Modeling, is proposed in this research. Active Learning Strategies have shown promising results in the Persian language, and LDA sampling showed a competitive performance compared to other approaches.",
language = "English",
ISBN = "979-10-95546-34-4",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ashrafi-asli-etal-2020-optimizing">
<titleInfo>
<title>Optimizing Annotation Effort Using Active Learning Strategies: A Sentiment Analysis Case Study in Persian</title>
</titleInfo>
<name type="personal">
<namePart type="given">Seyed</namePart>
<namePart type="given">Arad</namePart>
<namePart type="family">Ashrafi Asli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Behnam</namePart>
<namePart type="family">Sabeti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zahra</namePart>
<namePart type="family">Majdabadi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Preni</namePart>
<namePart type="family">Golazizian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Reza</namePart>
<namePart type="family">Fahmi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Omid</namePart>
<namePart type="family">Momenzadeh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-may</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 12th Language Resources and Evaluation Conference</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-34-4</identifier>
</relatedItem>
<abstract>Deep learning models are the current State-of-the-art methodologies towards many real-world problems. However, they need a substantial amount of labeled data to be trained appropriately. Acquiring labeled data can be challenging in some particular domains or less-resourced languages. There are some practical solutions regarding these issues, such as Active Learning and Transfer Learning. Active learning’s idea is simple: let the model choose the samples for annotation instead of labeling the whole dataset. This method leads to a more efficient annotation process. Active Learning models can achieve the baseline performance (the accuracy of the model trained on the whole dataset), with a considerably lower amount of labeled data. Several active learning approaches are tested in this work, and their compatibility with Persian is examined using a brand-new sentiment analysis dataset that is also introduced in this work. MirasOpinion, which to our knowledge is the largest Persian sentiment analysis dataset, is crawled from a Persian e-commerce website and annotated using a crowd-sourcing policy. LDA sampling, which is an efficient Active Learning strategy using Topic Modeling, is proposed in this research. Active Learning Strategies have shown promising results in the Persian language, and LDA sampling showed a competitive performance compared to other approaches.</abstract>
<identifier type="citekey">ashrafi-asli-etal-2020-optimizing</identifier>
<location>
<url>https://aclanthology.org/2020.lrec-1.348</url>
</location>
<part>
<date>2020-may</date>
<extent unit="page">
<start>2855</start>
<end>2861</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Optimizing Annotation Effort Using Active Learning Strategies: A Sentiment Analysis Case Study in Persian
%A Ashrafi Asli, Seyed Arad
%A Sabeti, Behnam
%A Majdabadi, Zahra
%A Golazizian, Preni
%A Fahmi, Reza
%A Momenzadeh, Omid
%S Proceedings of the 12th Language Resources and Evaluation Conference
%D 2020
%8 may
%I European Language Resources Association
%C Marseille, France
%@ 979-10-95546-34-4
%G English
%F ashrafi-asli-etal-2020-optimizing
%X Deep learning models are the current State-of-the-art methodologies towards many real-world problems. However, they need a substantial amount of labeled data to be trained appropriately. Acquiring labeled data can be challenging in some particular domains or less-resourced languages. There are some practical solutions regarding these issues, such as Active Learning and Transfer Learning. Active learning’s idea is simple: let the model choose the samples for annotation instead of labeling the whole dataset. This method leads to a more efficient annotation process. Active Learning models can achieve the baseline performance (the accuracy of the model trained on the whole dataset), with a considerably lower amount of labeled data. Several active learning approaches are tested in this work, and their compatibility with Persian is examined using a brand-new sentiment analysis dataset that is also introduced in this work. MirasOpinion, which to our knowledge is the largest Persian sentiment analysis dataset, is crawled from a Persian e-commerce website and annotated using a crowd-sourcing policy. LDA sampling, which is an efficient Active Learning strategy using Topic Modeling, is proposed in this research. Active Learning Strategies have shown promising results in the Persian language, and LDA sampling showed a competitive performance compared to other approaches.
%U https://aclanthology.org/2020.lrec-1.348
%P 2855-2861
Markdown (Informal)
[Optimizing Annotation Effort Using Active Learning Strategies: A Sentiment Analysis Case Study in Persian](https://aclanthology.org/2020.lrec-1.348) (Ashrafi Asli et al., LREC 2020)
ACL