@inproceedings{vernetti-freitas-2020-assessment,
title = "An Assessment of Language Identification Methods on Tweets and {W}ikipedia Articles",
author = "Vernetti, Pedro and
Freitas, Larissa",
booktitle = "Proceedings of the The Fourth Widening Natural Language Processing Workshop",
month = jul,
year = "2020",
address = "Seattle, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.winlp-1.15",
doi = "10.18653/v1/2020.winlp-1.15",
pages = "58--60",
abstract = "Language identification is the task of determining the language which a given text is written. This task is important for Natural Language Processing and Information Retrieval activities. Two popular approaches for language identification are the N-grams and stopwords models. In this paper, these two models were tested on different types of documents such as short, irregular texts (tweets) and long, regular texts (Wikipedia articles).",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="vernetti-freitas-2020-assessment">
<titleInfo>
<title>An Assessment of Language Identification Methods on Tweets and Wikipedia Articles</title>
</titleInfo>
<name type="personal">
<namePart type="given">Pedro</namePart>
<namePart type="family">Vernetti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Larissa</namePart>
<namePart type="family">Freitas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-jul</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the The Fourth Widening Natural Language Processing Workshop</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Seattle, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Language identification is the task of determining the language which a given text is written. This task is important for Natural Language Processing and Information Retrieval activities. Two popular approaches for language identification are the N-grams and stopwords models. In this paper, these two models were tested on different types of documents such as short, irregular texts (tweets) and long, regular texts (Wikipedia articles).</abstract>
<identifier type="citekey">vernetti-freitas-2020-assessment</identifier>
<identifier type="doi">10.18653/v1/2020.winlp-1.15</identifier>
<location>
<url>https://aclanthology.org/2020.winlp-1.15</url>
</location>
<part>
<date>2020-jul</date>
<extent unit="page">
<start>58</start>
<end>60</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T An Assessment of Language Identification Methods on Tweets and Wikipedia Articles
%A Vernetti, Pedro
%A Freitas, Larissa
%S Proceedings of the The Fourth Widening Natural Language Processing Workshop
%D 2020
%8 jul
%I Association for Computational Linguistics
%C Seattle, USA
%F vernetti-freitas-2020-assessment
%X Language identification is the task of determining the language which a given text is written. This task is important for Natural Language Processing and Information Retrieval activities. Two popular approaches for language identification are the N-grams and stopwords models. In this paper, these two models were tested on different types of documents such as short, irregular texts (tweets) and long, regular texts (Wikipedia articles).
%R 10.18653/v1/2020.winlp-1.15
%U https://aclanthology.org/2020.winlp-1.15
%U https://doi.org/10.18653/v1/2020.winlp-1.15
%P 58-60
Markdown (Informal)
[An Assessment of Language Identification Methods on Tweets and Wikipedia Articles](https://aclanthology.org/2020.winlp-1.15) (Vernetti & Freitas, WiNLP 2020)
ACL