@inproceedings{lichouri-etal-2021-arabic,
title = "{A}rabic Dialect Identification based on a Weighted Concatenation of {TF}-{IDF} Features",
author = "Lichouri, Mohamed and
Abbas, Mourad and
Lounnas, Khaled and
Benaziz, Besma and
Zitouni, Aicha",
booktitle = "Proceedings of the Sixth Arabic Natural Language Processing Workshop",
month = apr,
year = "2021",
address = "Kyiv, Ukraine (Virtual)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.wanlp-1.33",
pages = "282--286",
abstract = "In this paper, we analyze the impact of the weighted concatenation of TF-IDF features for the Arabic Dialect Identification task while we participated in the NADI2021 shared task. This study is performed for two subtasks: subtask 1.1 (country-level MSA) and subtask 1.2 (country-level DA) identification. The classifiers supporting our comparative study are Linear Support Vector Classification (LSVC), Linear Regression (LR), Perceptron, Stochastic Gradient Descent (SGD), Passive Aggressive (PA), Complement Naive Bayes (CNB), MutliLayer Perceptron (MLP), and RidgeClassifier. In the evaluation phase, our system gives F1 scores of 14.87{\%} and 21.49{\%}, for country-level MSA and DA identification respectively, which is very close to the average F1 scores achieved by the submitted systems and recorded for both subtasks (18.70{\%} and 24.23{\%}).",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lichouri-etal-2021-arabic">
<titleInfo>
<title>Arabic Dialect Identification based on a Weighted Concatenation of TF-IDF Features</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mohamed</namePart>
<namePart type="family">Lichouri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mourad</namePart>
<namePart type="family">Abbas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khaled</namePart>
<namePart type="family">Lounnas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Besma</namePart>
<namePart type="family">Benaziz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aicha</namePart>
<namePart type="family">Zitouni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-apr</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Sixth Arabic Natural Language Processing Workshop</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Kyiv, Ukraine (Virtual)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we analyze the impact of the weighted concatenation of TF-IDF features for the Arabic Dialect Identification task while we participated in the NADI2021 shared task. This study is performed for two subtasks: subtask 1.1 (country-level MSA) and subtask 1.2 (country-level DA) identification. The classifiers supporting our comparative study are Linear Support Vector Classification (LSVC), Linear Regression (LR), Perceptron, Stochastic Gradient Descent (SGD), Passive Aggressive (PA), Complement Naive Bayes (CNB), MutliLayer Perceptron (MLP), and RidgeClassifier. In the evaluation phase, our system gives F1 scores of 14.87% and 21.49%, for country-level MSA and DA identification respectively, which is very close to the average F1 scores achieved by the submitted systems and recorded for both subtasks (18.70% and 24.23%).</abstract>
<identifier type="citekey">lichouri-etal-2021-arabic</identifier>
<location>
<url>https://aclanthology.org/2021.wanlp-1.33</url>
</location>
<part>
<date>2021-apr</date>
<extent unit="page">
<start>282</start>
<end>286</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Arabic Dialect Identification based on a Weighted Concatenation of TF-IDF Features
%A Lichouri, Mohamed
%A Abbas, Mourad
%A Lounnas, Khaled
%A Benaziz, Besma
%A Zitouni, Aicha
%S Proceedings of the Sixth Arabic Natural Language Processing Workshop
%D 2021
%8 apr
%I Association for Computational Linguistics
%C Kyiv, Ukraine (Virtual)
%F lichouri-etal-2021-arabic
%X In this paper, we analyze the impact of the weighted concatenation of TF-IDF features for the Arabic Dialect Identification task while we participated in the NADI2021 shared task. This study is performed for two subtasks: subtask 1.1 (country-level MSA) and subtask 1.2 (country-level DA) identification. The classifiers supporting our comparative study are Linear Support Vector Classification (LSVC), Linear Regression (LR), Perceptron, Stochastic Gradient Descent (SGD), Passive Aggressive (PA), Complement Naive Bayes (CNB), MutliLayer Perceptron (MLP), and RidgeClassifier. In the evaluation phase, our system gives F1 scores of 14.87% and 21.49%, for country-level MSA and DA identification respectively, which is very close to the average F1 scores achieved by the submitted systems and recorded for both subtasks (18.70% and 24.23%).
%U https://aclanthology.org/2021.wanlp-1.33
%P 282-286
Markdown (Informal)
[Arabic Dialect Identification based on a Weighted Concatenation of TF-IDF Features](https://aclanthology.org/2021.wanlp-1.33) (Lichouri et al., WANLP 2021)
ACL