@inproceedings{verhoeven-etal-2017-gender,
title = "Gender Profiling for {S}lovene {T}witter communication: the Influence of Gender Marking, Content and Style",
author = "Verhoeven, Ben and
{\v{S}}krjanec, Iza and
Pollak, Senja",
booktitle = "Proceedings of the 6th Workshop on {B}alto-{S}lavic Natural Language Processing",
month = apr,
year = "2017",
address = "Valencia, Spain",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W17-1418",
doi = "10.18653/v1/W17-1418",
pages = "119--125",
abstract = "We present results of the first gender classification experiments on Slovene text to our knowledge. Inspired by the TwiSty corpus and experiments (Verhoeven et al., 2016), we employed the Janes corpus (Erjavec et al., 2016) and its gender annotations to perform gender classification experiments on Twitter text comparing a token-based and a lemma-based approach. We find that the token-based approach (92.6{\%} accuracy), containing gender markings related to the author, outperforms the lemma-based approach by about 5{\%}. Especially in the lemmatized version, we also observe stylistic and content-based differences in writing between men (e.g. more profane language, numerals and beer mentions) and women (e.g. more pronouns, emoticons and character flooding). Many of our findings corroborate previous research on other languages.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="verhoeven-etal-2017-gender">
<titleInfo>
<title>Gender Profiling for Slovene Twitter communication: the Influence of Gender Marking, Content and Style</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ben</namePart>
<namePart type="family">Verhoeven</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iza</namePart>
<namePart type="family">Škrjanec</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Senja</namePart>
<namePart type="family">Pollak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-apr</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 6th Workshop on Balto-Slavic Natural Language Processing</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Valencia, Spain</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present results of the first gender classification experiments on Slovene text to our knowledge. Inspired by the TwiSty corpus and experiments (Verhoeven et al., 2016), we employed the Janes corpus (Erjavec et al., 2016) and its gender annotations to perform gender classification experiments on Twitter text comparing a token-based and a lemma-based approach. We find that the token-based approach (92.6% accuracy), containing gender markings related to the author, outperforms the lemma-based approach by about 5%. Especially in the lemmatized version, we also observe stylistic and content-based differences in writing between men (e.g. more profane language, numerals and beer mentions) and women (e.g. more pronouns, emoticons and character flooding). Many of our findings corroborate previous research on other languages.</abstract>
<identifier type="citekey">verhoeven-etal-2017-gender</identifier>
<identifier type="doi">10.18653/v1/W17-1418</identifier>
<location>
<url>https://aclanthology.org/W17-1418</url>
</location>
<part>
<date>2017-apr</date>
<extent unit="page">
<start>119</start>
<end>125</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Gender Profiling for Slovene Twitter communication: the Influence of Gender Marking, Content and Style
%A Verhoeven, Ben
%A Škrjanec, Iza
%A Pollak, Senja
%S Proceedings of the 6th Workshop on Balto-Slavic Natural Language Processing
%D 2017
%8 apr
%I Association for Computational Linguistics
%C Valencia, Spain
%F verhoeven-etal-2017-gender
%X We present results of the first gender classification experiments on Slovene text to our knowledge. Inspired by the TwiSty corpus and experiments (Verhoeven et al., 2016), we employed the Janes corpus (Erjavec et al., 2016) and its gender annotations to perform gender classification experiments on Twitter text comparing a token-based and a lemma-based approach. We find that the token-based approach (92.6% accuracy), containing gender markings related to the author, outperforms the lemma-based approach by about 5%. Especially in the lemmatized version, we also observe stylistic and content-based differences in writing between men (e.g. more profane language, numerals and beer mentions) and women (e.g. more pronouns, emoticons and character flooding). Many of our findings corroborate previous research on other languages.
%R 10.18653/v1/W17-1418
%U https://aclanthology.org/W17-1418
%U https://doi.org/10.18653/v1/W17-1418
%P 119-125
Markdown (Informal)
[Gender Profiling for Slovene Twitter communication: the Influence of Gender Marking, Content and Style](https://aclanthology.org/W17-1418) (Verhoeven et al., 2017)
ACL