@inproceedings{rep-etal-2024-electras,
title = "Are {ELECTRA}`s Sentence Embeddings Beyond Repair? The Case of Semantic Textual Similarity",
author = "Rep, Ivan and
Duki{\'c}, David and
{\v{S}}najder, Jan",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/Add-Cong-Liu-Florida-Atlantic-University-author-id/2024.findings-emnlp.535/",
doi = "10.18653/v1/2024.findings-emnlp.535",
pages = "9159--9169",
abstract = "While BERT produces high-quality sentence embeddings, its pre-training computational cost is a significant drawback. In contrast, ELECTRA provides a cost-effective pre-training objective and downstream task performance improvements, but worse sentence embeddings. The community tacitly stopped utilizing ELECTRA`s sentence embeddings for semantic textual similarity (STS). We notice a significant drop in performance for the ELECTRA discriminator`s last layer in comparison to prior layers. We explore this drop and propose a way to repair the embeddings using a novel truncated model fine-tuning (TMFT) method. TMFT improves the Spearman correlation coefficient by over 8 points while increasing parameter efficiency on the STS Benchmark. We extend our analysis to various model sizes, languages, and two other tasks. Further, we discover the surprising efficacy of ELECTRA`s generator model, which performs on par with BERT, using significantly fewer parameters and a substantially smaller embedding size. Finally, we observe boosts by combining TMFT with word similarity or domain adaptive pre-training."
}
Markdown (Informal)
[Are ELECTRA’s Sentence Embeddings Beyond Repair? The Case of Semantic Textual Similarity](https://preview.aclanthology.org/Add-Cong-Liu-Florida-Atlantic-University-author-id/2024.findings-emnlp.535/) (Rep et al., Findings 2024)
ACL