@inproceedings{zhang-2022-language,
title = "Language Model Decomposition: Quantifying the Dependency and Correlation of Language Models",
author = "Zhang, Hao",
editor = "Goldberg, Yoav and
Kozareva, Zornitsa and
Zhang, Yue",
booktitle = "Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2022.emnlp-main.161/",
doi = "10.18653/v1/2022.emnlp-main.161",
pages = "2508--2517",
abstract = "Pre-trained language models (LMs), such as BERT (Devlin et al., 2018) and its variants, have led to significant improvements on various NLP tasks in past years. However, a theoretical framework for studying their relationships is still missing. In this paper, we fill this gap by investigating the linear dependency between pre-trained LMs. The linear dependency of LMs is defined analogously to the linear dependency of vectors. We propose Language Model Decomposition (LMD) to represent a LM using a linear combination of other LMs as basis, and derive the closed-form solution. A goodness-of-fit metric for LMD similar to the coefficient of determination is defined and used to measure the linear dependency of a set of LMs. In experiments, we find that BERT and eleven (11) BERT-like LMs are 91{\%} linearly dependent. This observation suggests that current state-of-the-art (SOTA) LMs are highly ``correlated''. To further advance SOTA we need more diverse and novel LMs that are less dependent on existing LMs."
}
Markdown (Informal)
[Language Model Decomposition: Quantifying the Dependency and Correlation of Language Models](https://preview.aclanthology.org/fix-sig-urls/2022.emnlp-main.161/) (Zhang, EMNLP 2022)
ACL