@inproceedings{kaneko-bollegala-2020-autoencoding,
title = "Autoencoding Improves Pre-trained Word Embeddings",
author = "Kaneko, Masahiro and
Bollegala, Danushka",
editor = "Scott, Donia and
Bel, Nuria and
Zong, Chengqing",
booktitle = "Proceedings of the 28th International Conference on Computational Linguistics",
month = dec,
year = "2020",
address = "Barcelona, Spain (Online)",
publisher = "International Committee on Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2020.coling-main.149/",
doi = "10.18653/v1/2020.coling-main.149",
pages = "1699--1713",
abstract = "Prior works investigating the geometry of pre-trained word embeddings have shown that word embeddings to be distributed in a narrow cone and by centering and projecting using principal component vectors one can increase the accuracy of a given set of pre-trained word embeddings. However, theoretically, this post-processing step is equivalent to applying a linear autoencoder to minimize the squared L2 reconstruction error. This result contradicts prior work (Mu and Viswanath, 2018) that proposed to remove the top principal components from pre-trained embeddings. We experimentally verify our theoretical claims and show that retaining the top principal components is indeed useful for improving pre-trained word embeddings, without requiring access to additional linguistic resources or labeled data."
}
Markdown (Informal)
[Autoencoding Improves Pre-trained Word Embeddings](https://preview.aclanthology.org/add-emnlp-2024-awards/2020.coling-main.149/) (Kaneko & Bollegala, COLING 2020)
ACL
- Masahiro Kaneko and Danushka Bollegala. 2020. Autoencoding Improves Pre-trained Word Embeddings. In Proceedings of the 28th International Conference on Computational Linguistics, pages 1699–1713, Barcelona, Spain (Online). International Committee on Computational Linguistics.