@inproceedings{barbaresi-2016-unsupervised,
title = "An Unsupervised Morphological Criterion for Discriminating Similar Languages",
author = "Barbaresi, Adrien",
editor = {Nakov, Preslav and
Zampieri, Marcos and
Tan, Liling and
Ljube{\v{s}}i{\'c}, Nikola and
Tiedemann, J{\"o}rg and
Malmasi, Shervin},
booktitle = "Proceedings of the Third Workshop on {NLP} for Similar Languages, Varieties and Dialects ({V}ar{D}ial3)",
month = dec,
year = "2016",
address = "Osaka, Japan",
publisher = "The COLING 2016 Organizing Committee",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/W16-4827/",
pages = "212--220",
abstract = "In this study conducted on the occasion of the Discriminating between Similar Languages shared task, I introduce an additional decision factor focusing on the token and subtoken level. The motivation behind this submission is to test whether a morphologically-informed criterion can add linguistically relevant information to global categorization and thus improve performance. The contributions of this paper are (1) a description of the unsupervised, low-resource method; (2) an evaluation and analysis of its raw performance; and (3) an assessment of its impact within a model comprising common indicators used in language identification. I present and discuss the systems used in the task A, a 12-way language identification task comprising varieties of five main language groups. Additionally I introduce a new off-the-shelf Naive Bayes classifier using a contrastive word and subword n-gram model ({\textquotedblleft}Bayesline{\textquotedblright}) which outperforms the best submissions."
}
Markdown (Informal)
[An Unsupervised Morphological Criterion for Discriminating Similar Languages](https://preview.aclanthology.org/add-emnlp-2024-awards/W16-4827/) (Barbaresi, VarDial 2016)
ACL