@inproceedings{couillet-etal-2020-word,
title = "Word Representations Concentrate and This is Good News!",
author = "Couillet, Romain and
Cinar, Yagmur Gizem and
Gaussier, Eric and
Imran, Muhammad",
booktitle = "Proceedings of the 24th Conference on Computational Natural Language Learning",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.conll-1.25",
doi = "10.18653/v1/2020.conll-1.25",
pages = "325--334",
abstract = "This article establishes that, unlike the legacy tf*idf representation, recent natural language representations (word embedding vectors) tend to exhibit a so-called \textit{concentration of measure phenomenon}, in the sense that, as the representation size $p$ and database size $n$ are both large, their behavior is similar to that of large dimensional Gaussian random vectors. This phenomenon may have important consequences as machine learning algorithms for natural language data could be amenable to improvement, thereby providing new theoretical insights into the field of natural language processing.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="couillet-etal-2020-word">
<titleInfo>
<title>Word Representations Concentrate and This is Good News!</title>
</titleInfo>
<name type="personal">
<namePart type="given">Romain</namePart>
<namePart type="family">Couillet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yagmur</namePart>
<namePart type="given">Gizem</namePart>
<namePart type="family">Cinar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eric</namePart>
<namePart type="family">Gaussier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Muhammad</namePart>
<namePart type="family">Imran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-nov</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 24th Conference on Computational Natural Language Learning</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This article establishes that, unlike the legacy tf*idf representation, recent natural language representations (word embedding vectors) tend to exhibit a so-called concentration of measure phenomenon, in the sense that, as the representation size $p$ and database size $n$ are both large, their behavior is similar to that of large dimensional Gaussian random vectors. This phenomenon may have important consequences as machine learning algorithms for natural language data could be amenable to improvement, thereby providing new theoretical insights into the field of natural language processing.</abstract>
<identifier type="citekey">couillet-etal-2020-word</identifier>
<identifier type="doi">10.18653/v1/2020.conll-1.25</identifier>
<location>
<url>https://aclanthology.org/2020.conll-1.25</url>
</location>
<part>
<date>2020-nov</date>
<extent unit="page">
<start>325</start>
<end>334</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Word Representations Concentrate and This is Good News!
%A Couillet, Romain
%A Cinar, Yagmur Gizem
%A Gaussier, Eric
%A Imran, Muhammad
%S Proceedings of the 24th Conference on Computational Natural Language Learning
%D 2020
%8 nov
%I Association for Computational Linguistics
%C Online
%F couillet-etal-2020-word
%X This article establishes that, unlike the legacy tf*idf representation, recent natural language representations (word embedding vectors) tend to exhibit a so-called concentration of measure phenomenon, in the sense that, as the representation size $p$ and database size $n$ are both large, their behavior is similar to that of large dimensional Gaussian random vectors. This phenomenon may have important consequences as machine learning algorithms for natural language data could be amenable to improvement, thereby providing new theoretical insights into the field of natural language processing.
%R 10.18653/v1/2020.conll-1.25
%U https://aclanthology.org/2020.conll-1.25
%U https://doi.org/10.18653/v1/2020.conll-1.25
%P 325-334
Markdown (Informal)
[Word Representations Concentrate and This is Good News!](https://aclanthology.org/2020.conll-1.25) (Couillet et al., CoNLL 2020)
ACL
- Romain Couillet, Yagmur Gizem Cinar, Eric Gaussier, and Muhammad Imran. 2020. Word Representations Concentrate and This is Good News!. In Proceedings of the 24th Conference on Computational Natural Language Learning, pages 325–334, Online. Association for Computational Linguistics.