@inproceedings{smith-etal-2012-good,
title = "A good space: Lexical predictors in word space evaluation",
author = {Smith, Christian and
Danielsson, Henrik and
J{\"o}nsson, Arne},
booktitle = "Proceedings of the Eighth International Conference on Language Resources and Evaluation ({LREC}'12)",
month = may,
year = "2012",
address = "Istanbul, Turkey",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2012/pdf/335_Paper.pdf",
pages = "2530--2535",
abstract = "Vector space models benefit from using an outside corpus to train the model. It is, however, unclear what constitutes a good training corpus. We have investigated the effect on summary quality when using various language resources to train a vector space based extraction summarizer. This is done by evaluating the performance of the summarizer utilizing vector spaces built from corpora from different genres, partitioned from the Swedish SUC-corpus. The corpora are also characterized using a variety of lexical measures commonly used in readability studies. The performance of the summarizer is measured by comparing automatically produced summaries to human created gold standard summaries using the ROUGE F-score. Our results show that the genre of the training corpus does not have a significant effect on summary quality. However, evaluating the variance in the F-score between the genres based on lexical measures as independent variables in a linear regression model, shows that vector spaces created from texts with high syntactic complexity, high word variation, short sentences and few long words produce better summaries.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="smith-etal-2012-good">
<titleInfo>
<title>A good space: Lexical predictors in word space evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christian</namePart>
<namePart type="family">Smith</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Henrik</namePart>
<namePart type="family">Danielsson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arne</namePart>
<namePart type="family">Jönsson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2012-may</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC’12)</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Istanbul, Turkey</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Vector space models benefit from using an outside corpus to train the model. It is, however, unclear what constitutes a good training corpus. We have investigated the effect on summary quality when using various language resources to train a vector space based extraction summarizer. This is done by evaluating the performance of the summarizer utilizing vector spaces built from corpora from different genres, partitioned from the Swedish SUC-corpus. The corpora are also characterized using a variety of lexical measures commonly used in readability studies. The performance of the summarizer is measured by comparing automatically produced summaries to human created gold standard summaries using the ROUGE F-score. Our results show that the genre of the training corpus does not have a significant effect on summary quality. However, evaluating the variance in the F-score between the genres based on lexical measures as independent variables in a linear regression model, shows that vector spaces created from texts with high syntactic complexity, high word variation, short sentences and few long words produce better summaries.</abstract>
<identifier type="citekey">smith-etal-2012-good</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/335_Paper.pdf</url>
</location>
<part>
<date>2012-may</date>
<extent unit="page">
<start>2530</start>
<end>2535</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A good space: Lexical predictors in word space evaluation
%A Smith, Christian
%A Danielsson, Henrik
%A Jönsson, Arne
%S Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC’12)
%D 2012
%8 may
%I European Language Resources Association (ELRA)
%C Istanbul, Turkey
%F smith-etal-2012-good
%X Vector space models benefit from using an outside corpus to train the model. It is, however, unclear what constitutes a good training corpus. We have investigated the effect on summary quality when using various language resources to train a vector space based extraction summarizer. This is done by evaluating the performance of the summarizer utilizing vector spaces built from corpora from different genres, partitioned from the Swedish SUC-corpus. The corpora are also characterized using a variety of lexical measures commonly used in readability studies. The performance of the summarizer is measured by comparing automatically produced summaries to human created gold standard summaries using the ROUGE F-score. Our results show that the genre of the training corpus does not have a significant effect on summary quality. However, evaluating the variance in the F-score between the genres based on lexical measures as independent variables in a linear regression model, shows that vector spaces created from texts with high syntactic complexity, high word variation, short sentences and few long words produce better summaries.
%U http://www.lrec-conf.org/proceedings/lrec2012/pdf/335_Paper.pdf
%P 2530-2535
Markdown (Informal)
[A good space: Lexical predictors in word space evaluation](http://www.lrec-conf.org/proceedings/lrec2012/pdf/335_Paper.pdf) (Smith et al., LREC 2012)
ACL
- Christian Smith, Henrik Danielsson, and Arne Jönsson. 2012. A good space: Lexical predictors in word space evaluation. In Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC'12), pages 2530–2535, Istanbul, Turkey. European Language Resources Association (ELRA).