@inproceedings{hosseini-etal-2023-bert,
title = "{BERT} Has More to Offer: {BERT} Layers Combination Yields Better Sentence Embeddings",
author = "Hosseini, MohammadSaleh and
Munia, Munawara and
Khan, Latifur",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2023",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.findings-emnlp.1030/",
doi = "10.18653/v1/2023.findings-emnlp.1030",
pages = "15419--15431",
abstract = "Obtaining sentence representations from BERT-based models as feature extractors is invaluable as it takes much less time to pre-compute a one-time representation of the data and then use it for the downstream tasks, rather than fine-tune the whole BERT. Most previous works acquire a sentence`s representation by passing it to BERT and averaging its last layer. In this paper, we propose that the combination of certain layers of a BERT-based model rested on the data set and model can achieve substantially better results. We empirically show the effectiveness of our method for different BERT-based models on different tasks and data sets. Specifically, on seven standard semantic textual similarity data sets, we outperform the baseline BERT by improving the Spearman`s correlation by up to 25.75{\%} and on average 16.32{\%} without any further training. We also achieved state-of-the-art results on eight transfer data sets by reducing the relative error by up to 37.41{\%} and on average 17.92{\%}."
}
Markdown (Informal)
[BERT Has More to Offer: BERT Layers Combination Yields Better Sentence Embeddings](https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.findings-emnlp.1030/) (Hosseini et al., Findings 2023)
ACL