@inproceedings{gurrutxaga-alegria-2012-measuring,
title = "Measuring the compositionality of {NV} expressions in {B}asque by means of distributional similarity techniques",
author = "Gurrutxaga, Antton and
Alegria, I{\~n}aki",
booktitle = "Proceedings of the Eighth International Conference on Language Resources and Evaluation ({LREC}'12)",
month = may,
year = "2012",
address = "Istanbul, Turkey",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2012/pdf/514_Paper.pdf",
pages = "2389--2394",
abstract = "We present several experiments aiming at measuring the semantic compositionality of NV expressions in Basque. Our approach is based on the hypothesis that compositionality can be related to distributional similarity. The contexts of each NV expression are compared with the contexts of its corresponding components, by means of different techniques, as similarity measures usually used with the Vector Space Model (VSM), Latent Semantic Analysis (LSA) and some measures implemented in the Lemur Toolkit, as Indri index, tf-idf, Okapi index and Kullback-Leibler divergence. Using our previous work with cooccurrence techniques as a baseline, the results point to improvements using the Indri index or Kullback-Leibler divergence, and a slight further improvement when used in combination with cooccurrence measures such as {\$}t{\$}-score, via rank-aggregation. This work is part of a project for MWE extraction and characterization using different techniques aiming at measuring the properties related to idiomaticity, as institutionalization, non-compositionality and lexico-syntactic fixedness.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gurrutxaga-alegria-2012-measuring">
<titleInfo>
<title>Measuring the compositionality of NV expressions in Basque by means of distributional similarity techniques</title>
</titleInfo>
<name type="personal">
<namePart type="given">Antton</namePart>
<namePart type="family">Gurrutxaga</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iñaki</namePart>
<namePart type="family">Alegria</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2012-may</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC’12)</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Istanbul, Turkey</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present several experiments aiming at measuring the semantic compositionality of NV expressions in Basque. Our approach is based on the hypothesis that compositionality can be related to distributional similarity. The contexts of each NV expression are compared with the contexts of its corresponding components, by means of different techniques, as similarity measures usually used with the Vector Space Model (VSM), Latent Semantic Analysis (LSA) and some measures implemented in the Lemur Toolkit, as Indri index, tf-idf, Okapi index and Kullback-Leibler divergence. Using our previous work with cooccurrence techniques as a baseline, the results point to improvements using the Indri index or Kullback-Leibler divergence, and a slight further improvement when used in combination with cooccurrence measures such as $t$-score, via rank-aggregation. This work is part of a project for MWE extraction and characterization using different techniques aiming at measuring the properties related to idiomaticity, as institutionalization, non-compositionality and lexico-syntactic fixedness.</abstract>
<identifier type="citekey">gurrutxaga-alegria-2012-measuring</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/514_Paper.pdf</url>
</location>
<part>
<date>2012-may</date>
<extent unit="page">
<start>2389</start>
<end>2394</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Measuring the compositionality of NV expressions in Basque by means of distributional similarity techniques
%A Gurrutxaga, Antton
%A Alegria, Iñaki
%S Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC’12)
%D 2012
%8 may
%I European Language Resources Association (ELRA)
%C Istanbul, Turkey
%F gurrutxaga-alegria-2012-measuring
%X We present several experiments aiming at measuring the semantic compositionality of NV expressions in Basque. Our approach is based on the hypothesis that compositionality can be related to distributional similarity. The contexts of each NV expression are compared with the contexts of its corresponding components, by means of different techniques, as similarity measures usually used with the Vector Space Model (VSM), Latent Semantic Analysis (LSA) and some measures implemented in the Lemur Toolkit, as Indri index, tf-idf, Okapi index and Kullback-Leibler divergence. Using our previous work with cooccurrence techniques as a baseline, the results point to improvements using the Indri index or Kullback-Leibler divergence, and a slight further improvement when used in combination with cooccurrence measures such as $t$-score, via rank-aggregation. This work is part of a project for MWE extraction and characterization using different techniques aiming at measuring the properties related to idiomaticity, as institutionalization, non-compositionality and lexico-syntactic fixedness.
%U http://www.lrec-conf.org/proceedings/lrec2012/pdf/514_Paper.pdf
%P 2389-2394
Markdown (Informal)
[Measuring the compositionality of NV expressions in Basque by means of distributional similarity techniques](http://www.lrec-conf.org/proceedings/lrec2012/pdf/514_Paper.pdf) (Gurrutxaga & Alegria, LREC 2012)
ACL