@inproceedings{fromreide-etal-2014-crowdsourcing,
title = "Crowdsourcing and annotating {NER} for {T}witter {\#}drift",
author = "Fromreide, Hege and
Hovy, Dirk and
S{\o}gaard, Anders",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Loftsson, Hrafn and
Maegaard, Bente and
Mariani, Joseph and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Ninth International Conference on Language Resources and Evaluation ({LREC}`14)",
month = may,
year = "2014",
address = "Reykjavik, Iceland",
publisher = "European Language Resources Association (ELRA)",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/L14-1361/",
pages = "2544--2547",
abstract = "We present two new NER datasets for Twitter; a manually annotated set of 1,467 tweets (kappa=0.942) and a set of 2,975 expert-corrected, crowdsourced NER annotated tweets from the dataset described in Finin et al. (2010). In our experiments with these datasets, we observe two important points: (a) language drift on Twitter is significant, and while off-the-shelf systems have been reported to perform well on in-sample data, they often perform poorly on new samples of tweets, (b) state-of-the-art performance across various datasets can be obtained from crowdsourced annotations, making it more feasible to {\textquotedblleft}catch up{\textquotedblright} with language drift."
}
Markdown (Informal)
[Crowdsourcing and annotating NER for Twitter #drift](https://preview.aclanthology.org/jlcl-multiple-ingestion/L14-1361/) (Fromreide et al., LREC 2014)
ACL
- Hege Fromreide, Dirk Hovy, and Anders Søgaard. 2014. Crowdsourcing and annotating NER for Twitter #drift. In Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC'14), pages 2544–2547, Reykjavik, Iceland. European Language Resources Association (ELRA).