@inproceedings{diab-etal-2016-creating,
title = "Creating a Large Multi-Layered Representational Repository of Linguistic Code Switched {A}rabic Data",
author = "Diab, Mona and
Ghoneim, Mahmoud and
Hawwari, Abdelati and
AlGhamdi, Fahad and
AlMarwani, Nada and
Al-Badrashiny, Mohamed",
booktitle = "Proceedings of the Tenth International Conference on Language Resources and Evaluation ({LREC}'16)",
month = may,
year = "2016",
address = "Portoro{\v{z}}, Slovenia",
publisher = "European Language Resources Association (ELRA)",
url = "https://aclanthology.org/L16-1669",
pages = "4228--4235",
abstract = "We present our effort to create a large Multi-Layered representational repository of Linguistic Code-Switched Arabic data. The process involves developing clear annotation standards and Guidelines, streamlining the annotation process, and implementing quality control measures. We used two main protocols for annotation: in-lab gold annotations and crowd sourcing annotations. We developed a web-based annotation tool to facilitate the management of the annotation process. The current version of the repository contains a total of 886,252 tokens that are tagged into one of sixteen code-switching tags. The data exhibits code switching between Modern Standard Arabic and Egyptian Dialectal Arabic representing three data genres: Tweets, commentaries, and discussion fora. The overall Inter-Annotator Agreement is 93.1{\%}.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="diab-etal-2016-creating">
<titleInfo>
<title>Creating a Large Multi-Layered Representational Repository of Linguistic Code Switched Arabic Data</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mona</namePart>
<namePart type="family">Diab</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mahmoud</namePart>
<namePart type="family">Ghoneim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abdelati</namePart>
<namePart type="family">Hawwari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fahad</namePart>
<namePart type="family">AlGhamdi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nada</namePart>
<namePart type="family">AlMarwani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohamed</namePart>
<namePart type="family">Al-Badrashiny</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016-may</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Portorož, Slovenia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present our effort to create a large Multi-Layered representational repository of Linguistic Code-Switched Arabic data. The process involves developing clear annotation standards and Guidelines, streamlining the annotation process, and implementing quality control measures. We used two main protocols for annotation: in-lab gold annotations and crowd sourcing annotations. We developed a web-based annotation tool to facilitate the management of the annotation process. The current version of the repository contains a total of 886,252 tokens that are tagged into one of sixteen code-switching tags. The data exhibits code switching between Modern Standard Arabic and Egyptian Dialectal Arabic representing three data genres: Tweets, commentaries, and discussion fora. The overall Inter-Annotator Agreement is 93.1%.</abstract>
<identifier type="citekey">diab-etal-2016-creating</identifier>
<location>
<url>https://aclanthology.org/L16-1669</url>
</location>
<part>
<date>2016-may</date>
<extent unit="page">
<start>4228</start>
<end>4235</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Creating a Large Multi-Layered Representational Repository of Linguistic Code Switched Arabic Data
%A Diab, Mona
%A Ghoneim, Mahmoud
%A Hawwari, Abdelati
%A AlGhamdi, Fahad
%A AlMarwani, Nada
%A Al-Badrashiny, Mohamed
%S Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)
%D 2016
%8 may
%I European Language Resources Association (ELRA)
%C Portorož, Slovenia
%F diab-etal-2016-creating
%X We present our effort to create a large Multi-Layered representational repository of Linguistic Code-Switched Arabic data. The process involves developing clear annotation standards and Guidelines, streamlining the annotation process, and implementing quality control measures. We used two main protocols for annotation: in-lab gold annotations and crowd sourcing annotations. We developed a web-based annotation tool to facilitate the management of the annotation process. The current version of the repository contains a total of 886,252 tokens that are tagged into one of sixteen code-switching tags. The data exhibits code switching between Modern Standard Arabic and Egyptian Dialectal Arabic representing three data genres: Tweets, commentaries, and discussion fora. The overall Inter-Annotator Agreement is 93.1%.
%U https://aclanthology.org/L16-1669
%P 4228-4235
Markdown (Informal)
[Creating a Large Multi-Layered Representational Repository of Linguistic Code Switched Arabic Data](https://aclanthology.org/L16-1669) (Diab et al., LREC 2016)
ACL