@inproceedings{vernetti-freitas-2020-assessment,
title = "An Assessment of Language Identification Methods on Tweets and {W}ikipedia Articles",
author = "Vernetti, Pedro and
Freitas, Larissa",
editor = "Cunha, Rossana and
Shaikh, Samira and
Varis, Erika and
Georgi, Ryan and
Tsai, Alicia and
Anastasopoulos, Antonios and
Chandu, Khyathi Raghavi",
booktitle = "Proceedings of the Fourth Widening Natural Language Processing Workshop",
month = jul,
year = "2020",
address = "Seattle, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2020.winlp-1.15/",
doi = "10.18653/v1/2020.winlp-1.15",
pages = "58--60",
abstract = "Language identification is the task of determining the language which a given text is written. This task is important for Natural Language Processing and Information Retrieval activities. Two popular approaches for language identification are the N-grams and stopwords models. In this paper, these two models were tested on different types of documents such as short, irregular texts (tweets) and long, regular texts (Wikipedia articles)."
}
Markdown (Informal)
[An Assessment of Language Identification Methods on Tweets and Wikipedia Articles](https://preview.aclanthology.org/jlcl-multiple-ingestion/2020.winlp-1.15/) (Vernetti & Freitas, WiNLP 2020)
ACL