@inproceedings{verhoeven-etal-2017-gender,
title = "Gender Profiling for {S}lovene {T}witter communication: the Influence of Gender Marking, Content and Style",
author = "Verhoeven, Ben and
{\v{S}}krjanec, Iza and
Pollak, Senja",
editor = "Erjavec, Toma{\v{z}} and
Piskorski, Jakub and
Pivovarova, Lidia and
{\v{S}}najder, Jan and
Steinberger, Josef and
Yangarber, Roman",
booktitle = "Proceedings of the 6th Workshop on {B}alto-{S}lavic Natural Language Processing",
month = apr,
year = "2017",
address = "Valencia, Spain",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/W17-1418/",
doi = "10.18653/v1/W17-1418",
pages = "119--125",
abstract = "We present results of the first gender classification experiments on Slovene text to our knowledge. Inspired by the TwiSty corpus and experiments (Verhoeven et al., 2016), we employed the Janes corpus (Erjavec et al., 2016) and its gender annotations to perform gender classification experiments on Twitter text comparing a token-based and a lemma-based approach. We find that the token-based approach (92.6{\%} accuracy), containing gender markings related to the author, outperforms the lemma-based approach by about 5{\%}. Especially in the lemmatized version, we also observe stylistic and content-based differences in writing between men (e.g. more profane language, numerals and beer mentions) and women (e.g. more pronouns, emoticons and character flooding). Many of our findings corroborate previous research on other languages."
}
Markdown (Informal)
[Gender Profiling for Slovene Twitter communication: the Influence of Gender Marking, Content and Style](https://preview.aclanthology.org/add-emnlp-2024-awards/W17-1418/) (Verhoeven et al., BSNLP 2017)
ACL