@inproceedings{touileb-2022-nerdz,
title = "{NERD}z: A Preliminary Dataset of Named Entities for {A}lgerian",
author = "Touileb, Samia",
editor = "He, Yulan and
Ji, Heng and
Li, Sujian and
Liu, Yang and
Chang, Chua-Hui",
booktitle = "Proceedings of the 2nd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 12th International Joint Conference on Natural Language Processing (Volume 2: Short Papers)",
month = nov,
year = "2022",
address = "Online only",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2022.aacl-short.13/",
doi = "10.18653/v1/2022.aacl-short.13",
pages = "95--101",
abstract = "This paper introduces a first step towards creating the NERDz dataset. A manually annotated dataset of named entities for the Algerian vernacular dialect. The annotations are built on top of a recent extension to the Algerian NArabizi Treebank, comprizing NArabizi sentences with manual transliterations into Arabic and code-switched scripts. NERDz is therefore not only the first dataset of named entities for Algerian, but it also comprises parallel entities written in Latin, Arabic, and code-switched scripts. We present a detailed overview of our annotations, inter-annotator agreement measures, and define two preliminary baselines using a neural sequence labeling approach and an Algerian BERT model. We also make the annotation guidelines and the annotations available for future work"
}
Markdown (Informal)
[NERDz: A Preliminary Dataset of Named Entities for Algerian](https://preview.aclanthology.org/jlcl-multiple-ingestion/2022.aacl-short.13/) (Touileb, AACL-IJCNLP 2022)
ACL
- Samia Touileb. 2022. NERDz: A Preliminary Dataset of Named Entities for Algerian. In Proceedings of the 2nd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 12th International Joint Conference on Natural Language Processing (Volume 2: Short Papers), pages 95–101, Online only. Association for Computational Linguistics.