@inproceedings{zhuang-etal-2021-robustly,
title = "A Robustly Optimized {BERT} Pre-training Approach with Post-training",
author = "Zhuang, Liu and
Wayne, Lin and
Ya, Shi and
Jun, Zhao",
editor = "Li, Sheng and
Sun, Maosong and
Liu, Yang and
Wu, Hua and
Liu, Kang and
Che, Wanxiang and
He, Shizhu and
Rao, Gaoqi",
booktitle = "Proceedings of the 20th Chinese National Conference on Computational Linguistics",
month = aug,
year = "2021",
address = "Huhhot, China",
publisher = "Chinese Information Processing Society of China",
url = "https://preview.aclanthology.org/fix-sig-urls/2021.ccl-1.108/",
pages = "1218--1227",
language = "eng",
abstract = "In the paper we present a `pre-training'+{`}post-training'+{`}fine-tuning' three-stage paradigm which is a supplementary framework for the standard `pre-training'+{`}fine-tuning' languagemodel approach. Furthermore based on three-stage paradigm we present a language modelnamed PPBERT. Compared with original BERT architecture that is based on the standard two-stage paradigm we do not fine-tune pre-trained model directly but rather post-train it on the domain or task related dataset first which helps to better incorporate task-awareness knowl-edge and domain-awareness knowledge within pre-trained model also from the training datasetreduce bias. Extensive experimental results indicate that proposed model improves the perfor-mance of the baselines on 24 NLP tasks which includes eight GLUE benchmarks eight Su-perGLUE benchmarks six extractive question answering benchmarks. More remarkably our proposed model is a more flexible and pluggable model where post-training approach is able to be plugged into other PLMs that are based on BERT. Extensive ablations further validate the effectiveness and its state-of-the-art (SOTA) performance. The open source code pre-trained models and post-trained models are available publicly."
}
Markdown (Informal)
[A Robustly Optimized BERT Pre-training Approach with Post-training](https://preview.aclanthology.org/fix-sig-urls/2021.ccl-1.108/) (Zhuang et al., CCL 2021)
ACL