@inproceedings{riedl-etal-2014-distributed,
title = "Distributed Distributional Similarities of {G}oogle {B}ooks Over the Centuries",
author = "Riedl, Martin and
Steuer, Richard and
Biemann, Chris",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Loftsson, Hrafn and
Maegaard, Bente and
Mariani, Joseph and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Ninth International Conference on Language Resources and Evaluation ({LREC}`14)",
month = may,
year = "2014",
address = "Reykjavik, Iceland",
publisher = "European Language Resources Association (ELRA)",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/L14-1249/",
pages = "1401--1405",
abstract = "This paper introduces a distributional thesaurus and sense clusters computed on the complete Google Syntactic N-grams, which is extracted from Google Books, a very large corpus of digitized books published between 1520 and 2008. We show that a thesaurus computed on such a large text basis leads to much better results than using smaller corpora like Wikipedia. We also provide distributional thesauri for equal-sized time slices of the corpus. While distributional thesauri can be used as lexical resources in NLP tasks, comparing word similarities over time can unveil sense change of terms across different decades or centuries, and can serve as a resource for diachronic lexicography. Thesauri and clusters are available for download."
}
Markdown (Informal)
[Distributed Distributional Similarities of Google Books Over the Centuries](https://preview.aclanthology.org/jlcl-multiple-ingestion/L14-1249/) (Riedl et al., LREC 2014)
ACL