@inproceedings{goutte-leger-2016-advances,
title = "Advances in Ngram-based Discrimination of Similar Languages",
author = "Goutte, Cyril and
L{\'e}ger, Serge",
editor = {Nakov, Preslav and
Zampieri, Marcos and
Tan, Liling and
Ljube{\v{s}}i{\'c}, Nikola and
Tiedemann, J{\"o}rg and
Malmasi, Shervin},
booktitle = "Proceedings of the Third Workshop on {NLP} for Similar Languages, Varieties and Dialects ({V}ar{D}ial3)",
month = dec,
year = "2016",
address = "Osaka, Japan",
publisher = "The COLING 2016 Organizing Committee",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/W16-4823/",
pages = "178--184",
abstract = "We describe the systems entered by the National Research Council in the 2016 shared task on discriminating similar languages. Like previous years, we relied on character ngram features, and a mixture of discriminative and generative statistical classifiers. We mostly investigated the influence of the amount of data on the performance, in the open task, and compared the two-stage approach (predicting language/group, then variant) to a flat approach. Results suggest that ngrams are still state-of-the-art for language and variant identification, and that additional data has a small but decisive impact."
}
Markdown (Informal)
[Advances in Ngram-based Discrimination of Similar Languages](https://preview.aclanthology.org/add-emnlp-2024-awards/W16-4823/) (Goutte & Léger, VarDial 2016)
ACL