@inproceedings{adouane-etal-2016-romanized,
title = "{R}omanized {B}erber and {R}omanized {A}rabic Automatic Language Identification Using Machine Learning",
author = "Adouane, Wafia and
Semmar, Nasredine and
Johansson, Richard",
editor = {Nakov, Preslav and
Zampieri, Marcos and
Tan, Liling and
Ljube{\v{s}}i{\'c}, Nikola and
Tiedemann, J{\"o}rg and
Malmasi, Shervin},
booktitle = "Proceedings of the Third Workshop on {NLP} for Similar Languages, Varieties and Dialects ({V}ar{D}ial3)",
month = dec,
year = "2016",
address = "Osaka, Japan",
publisher = "The COLING 2016 Organizing Committee",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/W16-4807/",
pages = "53--61",
abstract = "The identification of the language of text/speech input is the first step to be able to properly do any language-dependent natural language processing. The task is called Automatic Language Identification (ALI). Being a well-studied field since early 1960`s, various methods have been applied to many standard languages. The ALI standard methods require datasets for training and use character/word-based n-gram models. However, social media and new technologies have contributed to the rise of informal and minority languages on the Web. The state-of-the-art automatic language identifiers fail to properly identify many of them. Romanized Arabic (RA) and Romanized Berber (RB) are cases of these informal languages which are under-resourced. The goal of this paper is twofold: detect RA and RB, at a document level, as separate languages and distinguish between them as they coexist in North Africa. We consider the task as a classification problem and use supervised machine learning to solve it. For both languages, character-based 5-grams combined with additional lexicons score the best, F-score of 99.75{\%} and 97.77{\%} for RB and RA respectively."
}
Markdown (Informal)
[Romanized Berber and Romanized Arabic Automatic Language Identification Using Machine Learning](https://preview.aclanthology.org/add-emnlp-2024-awards/W16-4807/) (Adouane et al., VarDial 2016)
ACL