@inproceedings{ljubesic-etal-2017-language,
title = "Language-independent Gender Prediction on {T}witter",
author = "Ljube{\v{s}}i{\'c}, Nikola and
Fi{\v{s}}er, Darja and
Erjavec, Toma{\v{z}}",
booktitle = "Proceedings of the Second Workshop on {NLP} and Computational Social Science",
month = aug,
year = "2017",
address = "Vancouver, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W17-2901",
doi = "10.18653/v1/W17-2901",
pages = "1--6",
abstract = "In this paper we present a set of experiments and analyses on predicting the gender of Twitter users based on language-independent features extracted either from the text or the metadata of users{'} tweets. We perform our experiments on the TwiSty dataset containing manual gender annotations for users speaking six different languages. Our classification results show that, while the prediction model based on language-independent features performs worse than the bag-of-words model when training and testing on the same language, it regularly outperforms the bag-of-words model when applied to different languages, showing very stable results across various languages. Finally we perform a comparative analysis of feature effect sizes across the six languages and show that differences in our features correspond to cultural distances.",
}
Markdown (Informal)
[Language-independent Gender Prediction on Twitter](https://aclanthology.org/W17-2901) (Ljubešić et al., NLP+CSS 2017)
ACL
- Nikola Ljubešić, Darja Fišer, and Tomaž Erjavec. 2017. Language-independent Gender Prediction on Twitter. In Proceedings of the Second Workshop on NLP and Computational Social Science, pages 1–6, Vancouver, Canada. Association for Computational Linguistics.