@inproceedings{sanders-2012-collecting,
title = "Collecting and Analysing Chats and Tweets in {S}o{N}a{R}",
author = "Sanders, Eric",
booktitle = "Proceedings of the Eighth International Conference on Language Resources and Evaluation ({LREC}'12)",
month = may,
year = "2012",
address = "Istanbul, Turkey",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2012/pdf/416_Paper.pdf",
pages = "2253--2256",
abstract = "In this paper a collection of chats and tweets from the Netherlands and Flanders is described. The chats and tweets are part of the freely available SoNaR corpus, a 500 million word text corpus of the Dutch language. Recruitment, metadata, anonymisation and IPR issues are discussed. To illustrate the difference of language use between the various text types and other parameters (like gender and age) simple text analysis in the form of unigram frequency lists is carried out. Furthermore a website is presented with which users can retrieve their own frequency lists.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sanders-2012-collecting">
<titleInfo>
<title>Collecting and Analysing Chats and Tweets in SoNaR</title>
</titleInfo>
<name type="personal">
<namePart type="given">Eric</namePart>
<namePart type="family">Sanders</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2012-may</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC’12)</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Istanbul, Turkey</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper a collection of chats and tweets from the Netherlands and Flanders is described. The chats and tweets are part of the freely available SoNaR corpus, a 500 million word text corpus of the Dutch language. Recruitment, metadata, anonymisation and IPR issues are discussed. To illustrate the difference of language use between the various text types and other parameters (like gender and age) simple text analysis in the form of unigram frequency lists is carried out. Furthermore a website is presented with which users can retrieve their own frequency lists.</abstract>
<identifier type="citekey">sanders-2012-collecting</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/416_Paper.pdf</url>
</location>
<part>
<date>2012-may</date>
<extent unit="page">
<start>2253</start>
<end>2256</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Collecting and Analysing Chats and Tweets in SoNaR
%A Sanders, Eric
%S Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC’12)
%D 2012
%8 may
%I European Language Resources Association (ELRA)
%C Istanbul, Turkey
%F sanders-2012-collecting
%X In this paper a collection of chats and tweets from the Netherlands and Flanders is described. The chats and tweets are part of the freely available SoNaR corpus, a 500 million word text corpus of the Dutch language. Recruitment, metadata, anonymisation and IPR issues are discussed. To illustrate the difference of language use between the various text types and other parameters (like gender and age) simple text analysis in the form of unigram frequency lists is carried out. Furthermore a website is presented with which users can retrieve their own frequency lists.
%U http://www.lrec-conf.org/proceedings/lrec2012/pdf/416_Paper.pdf
%P 2253-2256
Markdown (Informal)
[Collecting and Analysing Chats and Tweets in SoNaR](http://www.lrec-conf.org/proceedings/lrec2012/pdf/416_Paper.pdf) (Sanders, LREC 2012)
ACL
- Eric Sanders. 2012. Collecting and Analysing Chats and Tweets in SoNaR. In Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC'12), pages 2253–2256, Istanbul, Turkey. European Language Resources Association (ELRA).