@inproceedings{nagatsuka-etal-2021-pre,
title = "Pre-training a {BERT} with Curriculum Learning by Increasing Block-Size of Input Text",
author = "Nagatsuka, Koichi and
Broni-Bediako, Clifford and
Atsumi, Masayasu",
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)",
month = sep,
year = "2021",
address = "Held Online",
publisher = "INCOMA Ltd.",
url = "https://preview.aclanthology.org/fix-sig-urls/2021.ranlp-1.112/",
pages = "989--996",
abstract = "Recently, pre-trained language representation models such as BERT and RoBERTa have achieved significant results in a wide range of natural language processing (NLP) tasks, however, it requires extremely high computational cost. Curriculum Learning (CL) is one of the potential solutions to alleviate this problem. CL is a training strategy where training samples are given to models in a meaningful order instead of random sampling. In this work, we propose a new CL method which gradually increases the block-size of input text for training the self-attention mechanism of BERT and its variants using the maximum available batch-size. Experiments in low-resource settings show that our approach outperforms the baseline in terms of convergence speed and final performance on downstream tasks."
}
Markdown (Informal)
[Pre-training a BERT with Curriculum Learning by Increasing Block-Size of Input Text](https://preview.aclanthology.org/fix-sig-urls/2021.ranlp-1.112/) (Nagatsuka et al., RANLP 2021)
ACL