@inproceedings{wennberg-henter-2024-learned,
title = "Learned Transformer Position Embeddings Have a Low-Dimensional Structure",
author = "Wennberg, Ulme and
Henter, Gustav",
editor = "Zhao, Chen and
Mosbach, Marius and
Atanasova, Pepa and
Goldfarb-Tarrent, Seraphina and
Hase, Peter and
Hosseini, Arian and
Elbayad, Maha and
Pezzelle, Sandro and
Mozes, Maximilian",
booktitle = "Proceedings of the 9th Workshop on Representation Learning for NLP (RepL4NLP-2024)",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2024.repl4nlp-1.17/",
pages = "237--244",
abstract = "Position embeddings have long been essential for sequence-order encoding in transformer models, yet their structure is underexplored. This study uses principal component analysis (PCA) to quantitatively compare the dimensionality of absolute position and word embeddings in BERT and ALBERT. We find that, unlike word embeddings, position embeddings occupy a low-dimensional subspace, typically utilizing under 10{\%} of the dimensions available. Additionally, the principal vectors are dominated by a few low-frequency rotational components, a structure arising independently across models."
}
Markdown (Informal)
[Learned Transformer Position Embeddings Have a Low-Dimensional Structure](https://preview.aclanthology.org/fix-sig-urls/2024.repl4nlp-1.17/) (Wennberg & Henter, RepL4NLP 2024)
ACL