@inproceedings{soler-company-wanner-2014-use,
title = "How to Use less Features and Reach Better Performance in Author Gender Identification",
author = "Soler Company, Juan and
Wanner, Leo",
booktitle = "Proceedings of the Ninth International Conference on Language Resources and Evaluation ({LREC}'14)",
month = may,
year = "2014",
address = "Reykjavik, Iceland",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2014/pdf/104_Paper.pdf",
pages = "1315--1319",
abstract = "Over the last years, author profiling in general and author gender identification in particular have become a popular research area due to their potential attractive applications that range from forensic investigations to online marketing studies. However, nearly all state-of-the-art works in the area still very much depend on the datasets they were trained and tested on, since they heavily draw on content features, mostly a large number of recurrent words or combinations of words extracted from the training sets. We show that using a small number of features that mainly depend on the structure of the texts we can outperform other approaches that depend mainly on the content of the texts and that use a huge number of features in the process of identifying if the author of a text is a man or a woman. Our system has been tested against a dataset constructed for our work as well as against two datasets that were previously used in other papers.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="soler-company-wanner-2014-use">
<titleInfo>
<title>How to Use less Features and Reach Better Performance in Author Gender Identification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="family">Soler Company</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leo</namePart>
<namePart type="family">Wanner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2014-may</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC’14)</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Reykjavik, Iceland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Over the last years, author profiling in general and author gender identification in particular have become a popular research area due to their potential attractive applications that range from forensic investigations to online marketing studies. However, nearly all state-of-the-art works in the area still very much depend on the datasets they were trained and tested on, since they heavily draw on content features, mostly a large number of recurrent words or combinations of words extracted from the training sets. We show that using a small number of features that mainly depend on the structure of the texts we can outperform other approaches that depend mainly on the content of the texts and that use a huge number of features in the process of identifying if the author of a text is a man or a woman. Our system has been tested against a dataset constructed for our work as well as against two datasets that were previously used in other papers.</abstract>
<identifier type="citekey">soler-company-wanner-2014-use</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/104_Paper.pdf</url>
</location>
<part>
<date>2014-may</date>
<extent unit="page">
<start>1315</start>
<end>1319</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T How to Use less Features and Reach Better Performance in Author Gender Identification
%A Soler Company, Juan
%A Wanner, Leo
%S Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC’14)
%D 2014
%8 may
%I European Language Resources Association (ELRA)
%C Reykjavik, Iceland
%F soler-company-wanner-2014-use
%X Over the last years, author profiling in general and author gender identification in particular have become a popular research area due to their potential attractive applications that range from forensic investigations to online marketing studies. However, nearly all state-of-the-art works in the area still very much depend on the datasets they were trained and tested on, since they heavily draw on content features, mostly a large number of recurrent words or combinations of words extracted from the training sets. We show that using a small number of features that mainly depend on the structure of the texts we can outperform other approaches that depend mainly on the content of the texts and that use a huge number of features in the process of identifying if the author of a text is a man or a woman. Our system has been tested against a dataset constructed for our work as well as against two datasets that were previously used in other papers.
%U http://www.lrec-conf.org/proceedings/lrec2014/pdf/104_Paper.pdf
%P 1315-1319
Markdown (Informal)
[How to Use less Features and Reach Better Performance in Author Gender Identification](http://www.lrec-conf.org/proceedings/lrec2014/pdf/104_Paper.pdf) (Soler Company & Wanner, LREC 2014)
ACL