@inproceedings{nagatsuka-etal-2021-pre,
title = "Pre-training a {BERT} with Curriculum Learning by Increasing Block-Size of Input Text",
author = "Nagatsuka, Koichi and
Broni-Bediako, Clifford and
Atsumi, Masayasu",
booktitle = "Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)",
month = sep,
year = "2021",
address = "Held Online",
publisher = "INCOMA Ltd.",
url = "https://aclanthology.org/2021.ranlp-1.112",
pages = "989--996",
abstract = "Recently, pre-trained language representation models such as BERT and RoBERTa have achieved significant results in a wide range of natural language processing (NLP) tasks, however, it requires extremely high computational cost. Curriculum Learning (CL) is one of the potential solutions to alleviate this problem. CL is a training strategy where training samples are given to models in a meaningful order instead of random sampling. In this work, we propose a new CL method which gradually increases the block-size of input text for training the self-attention mechanism of BERT and its variants using the maximum available batch-size. Experiments in low-resource settings show that our approach outperforms the baseline in terms of convergence speed and final performance on downstream tasks.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nagatsuka-etal-2021-pre">
<titleInfo>
<title>Pre-training a BERT with Curriculum Learning by Increasing Block-Size of Input Text</title>
</titleInfo>
<name type="personal">
<namePart type="given">Koichi</namePart>
<namePart type="family">Nagatsuka</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Clifford</namePart>
<namePart type="family">Broni-Bediako</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Masayasu</namePart>
<namePart type="family">Atsumi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-sep</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)</title>
</titleInfo>
<originInfo>
<publisher>INCOMA Ltd.</publisher>
<place>
<placeTerm type="text">Held Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Recently, pre-trained language representation models such as BERT and RoBERTa have achieved significant results in a wide range of natural language processing (NLP) tasks, however, it requires extremely high computational cost. Curriculum Learning (CL) is one of the potential solutions to alleviate this problem. CL is a training strategy where training samples are given to models in a meaningful order instead of random sampling. In this work, we propose a new CL method which gradually increases the block-size of input text for training the self-attention mechanism of BERT and its variants using the maximum available batch-size. Experiments in low-resource settings show that our approach outperforms the baseline in terms of convergence speed and final performance on downstream tasks.</abstract>
<identifier type="citekey">nagatsuka-etal-2021-pre</identifier>
<location>
<url>https://aclanthology.org/2021.ranlp-1.112</url>
</location>
<part>
<date>2021-sep</date>
<extent unit="page">
<start>989</start>
<end>996</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Pre-training a BERT with Curriculum Learning by Increasing Block-Size of Input Text
%A Nagatsuka, Koichi
%A Broni-Bediako, Clifford
%A Atsumi, Masayasu
%S Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)
%D 2021
%8 sep
%I INCOMA Ltd.
%C Held Online
%F nagatsuka-etal-2021-pre
%X Recently, pre-trained language representation models such as BERT and RoBERTa have achieved significant results in a wide range of natural language processing (NLP) tasks, however, it requires extremely high computational cost. Curriculum Learning (CL) is one of the potential solutions to alleviate this problem. CL is a training strategy where training samples are given to models in a meaningful order instead of random sampling. In this work, we propose a new CL method which gradually increases the block-size of input text for training the self-attention mechanism of BERT and its variants using the maximum available batch-size. Experiments in low-resource settings show that our approach outperforms the baseline in terms of convergence speed and final performance on downstream tasks.
%U https://aclanthology.org/2021.ranlp-1.112
%P 989-996
Markdown (Informal)
[Pre-training a BERT with Curriculum Learning by Increasing Block-Size of Input Text](https://aclanthology.org/2021.ranlp-1.112) (Nagatsuka et al., RANLP 2021)
ACL