@inproceedings{he-etal-2023-peer,
title = "{PEER}: Pre-training {ELECTRA} Extended by Ranking",
author = "He, Ru and
Wang, Wei and
Huang, Songfang and
Huang, Fei",
editor = "Rogers, Anna and
Boyd-Graber, Jordan and
Okazaki, Naoaki",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2023",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2023.findings-acl.405/",
doi = "10.18653/v1/2023.findings-acl.405",
pages = "6475--6491",
abstract = "The BERT model and its variants have made great achievements in many downstream natural language processing tasks. The achievements of these models, however, demand highly expensive pre-training computation cost. To address this pre-training efficiency issue, the ELECTRA model is proposed to use a discriminator to perform replaced token detection (RTD) task, that is, to classify whether each input token is original or replaced by a generator. The RTD task performed by the ELECTRA accelerates pre-training so substantially, such that it is very challenging to further improve the pre-training efficiency established by the ELECTRA by using or adding other pre-training tasks, as the recent comprehensive study of Bajaj et al. (2022) summarizes. To further advance this pre-training efficiency frontier, in this paper we propose to extend the RTD task into a task of ranking input tokens according to K different quality levels. Essentially, we generalize the binary classifier in the ELECTRA into a K-level ranker to undertake a more precise task with negligible additional computation cost. Our extensive experiments show that our proposed method is able to outperform the state-of-the-art pre-training efficient models including ELECTRA in downstream GLUE tasks given the same computation cost."
}
Markdown (Informal)
[PEER: Pre-training ELECTRA Extended by Ranking](https://preview.aclanthology.org/fix-sig-urls/2023.findings-acl.405/) (He et al., Findings 2023)
ACL
- Ru He, Wei Wang, Songfang Huang, and Fei Huang. 2023. PEER: Pre-training ELECTRA Extended by Ranking. In Findings of the Association for Computational Linguistics: ACL 2023, pages 6475–6491, Toronto, Canada. Association for Computational Linguistics.