@inproceedings{padro-etal-2014-comparing,
title = "Comparing Similarity Measures for Distributional Thesauri",
author = "Padr{\'o}, Muntsa and
Idiart, Marco and
Villavicencio, Aline and
Ramisch, Carlos",
booktitle = "Proceedings of the Ninth International Conference on Language Resources and Evaluation ({LREC}'14)",
month = may,
year = "2014",
address = "Reykjavik, Iceland",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2014/pdf/619_Paper.pdf",
pages = "2964--2971",
abstract = "Distributional thesauri have been applied for a variety of tasks involving semantic relatedness. In this paper, we investigate the impact of three parameters: similarity measures, frequency thresholds and association scores. We focus on the robustness and stability of the resulting thesauri, measuring inter-thesaurus agreement when testing different parameter values. The results obtained show that low-frequency thresholds affect thesaurus quality more than similarity measures, with more agreement found for increasing thresholds.These results indicate the sensitivity of distributional thesauri to frequency. Nonetheless, the observed differences do not transpose over extrinsic evaluation using TOEFL-like questions. While this may be specific to the task, we argue that a careful examination of the stability of distributional resources prior to application is needed.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="padro-etal-2014-comparing">
<titleInfo>
<title>Comparing Similarity Measures for Distributional Thesauri</title>
</titleInfo>
<name type="personal">
<namePart type="given">Muntsa</namePart>
<namePart type="family">Padró</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Idiart</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aline</namePart>
<namePart type="family">Villavicencio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carlos</namePart>
<namePart type="family">Ramisch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2014-may</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC’14)</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Reykjavik, Iceland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Distributional thesauri have been applied for a variety of tasks involving semantic relatedness. In this paper, we investigate the impact of three parameters: similarity measures, frequency thresholds and association scores. We focus on the robustness and stability of the resulting thesauri, measuring inter-thesaurus agreement when testing different parameter values. The results obtained show that low-frequency thresholds affect thesaurus quality more than similarity measures, with more agreement found for increasing thresholds.These results indicate the sensitivity of distributional thesauri to frequency. Nonetheless, the observed differences do not transpose over extrinsic evaluation using TOEFL-like questions. While this may be specific to the task, we argue that a careful examination of the stability of distributional resources prior to application is needed.</abstract>
<identifier type="citekey">padro-etal-2014-comparing</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/619_Paper.pdf</url>
</location>
<part>
<date>2014-may</date>
<extent unit="page">
<start>2964</start>
<end>2971</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Comparing Similarity Measures for Distributional Thesauri
%A Padró, Muntsa
%A Idiart, Marco
%A Villavicencio, Aline
%A Ramisch, Carlos
%S Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC’14)
%D 2014
%8 may
%I European Language Resources Association (ELRA)
%C Reykjavik, Iceland
%F padro-etal-2014-comparing
%X Distributional thesauri have been applied for a variety of tasks involving semantic relatedness. In this paper, we investigate the impact of three parameters: similarity measures, frequency thresholds and association scores. We focus on the robustness and stability of the resulting thesauri, measuring inter-thesaurus agreement when testing different parameter values. The results obtained show that low-frequency thresholds affect thesaurus quality more than similarity measures, with more agreement found for increasing thresholds.These results indicate the sensitivity of distributional thesauri to frequency. Nonetheless, the observed differences do not transpose over extrinsic evaluation using TOEFL-like questions. While this may be specific to the task, we argue that a careful examination of the stability of distributional resources prior to application is needed.
%U http://www.lrec-conf.org/proceedings/lrec2014/pdf/619_Paper.pdf
%P 2964-2971
Markdown (Informal)
[Comparing Similarity Measures for Distributional Thesauri](http://www.lrec-conf.org/proceedings/lrec2014/pdf/619_Paper.pdf) (Padró et al., LREC 2014)
ACL
- Muntsa Padró, Marco Idiart, Aline Villavicencio, and Carlos Ramisch. 2014. Comparing Similarity Measures for Distributional Thesauri. In Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC'14), pages 2964–2971, Reykjavik, Iceland. European Language Resources Association (ELRA).