@inproceedings{itoh-shinnou-2021-domain,
title = "Domain-Specific {J}apanese {ELECTRA} Model Using a Small Corpus",
author = "Itoh, Youki and
Shinnou, Hiroyuki",
booktitle = "Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)",
month = sep,
year = "2021",
address = "Held Online",
publisher = "INCOMA Ltd.",
url = "https://aclanthology.org/2021.ranlp-1.72",
pages = "640--646",
abstract = "Recently, domain shift, which affects accuracy due to differences in data between source and target domains, has become a serious issue when using machine learning methods to solve natural language processing tasks. With additional pretraining and fine-tuning using a target domain corpus, pretraining models such as BERT (Bidirectional Encoder Representations from Transformers) can address this issue. However, the additional pretraining of the BERT model is difficult because it requires significant computing resources. The efficiently learning an encoder that classifies token replacements accurately (ELECTRA) pretraining model replaces the BERT pretraining method{'}s masked language modeling with a method called replaced token detection, which improves the computational efficiency and allows the additional pretraining of the model to a practical extent. Herein, we propose a method for addressing the computational efficiency of pretraining models in domain shift by constructing an ELECTRA pretraining model on a Japanese dataset and additional pretraining this model in a downstream task using a corpus from the target domain. We constructed a pretraining model for ELECTRA in Japanese and conducted experiments on a document classification task using data from Japanese news articles. Results show that even a model smaller than the pretrained model performs equally well.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="itoh-shinnou-2021-domain">
<titleInfo>
<title>Domain-Specific Japanese ELECTRA Model Using a Small Corpus</title>
</titleInfo>
<name type="personal">
<namePart type="given">Youki</namePart>
<namePart type="family">Itoh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hiroyuki</namePart>
<namePart type="family">Shinnou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-sep</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)</title>
</titleInfo>
<originInfo>
<publisher>INCOMA Ltd.</publisher>
<place>
<placeTerm type="text">Held Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Recently, domain shift, which affects accuracy due to differences in data between source and target domains, has become a serious issue when using machine learning methods to solve natural language processing tasks. With additional pretraining and fine-tuning using a target domain corpus, pretraining models such as BERT (Bidirectional Encoder Representations from Transformers) can address this issue. However, the additional pretraining of the BERT model is difficult because it requires significant computing resources. The efficiently learning an encoder that classifies token replacements accurately (ELECTRA) pretraining model replaces the BERT pretraining method’s masked language modeling with a method called replaced token detection, which improves the computational efficiency and allows the additional pretraining of the model to a practical extent. Herein, we propose a method for addressing the computational efficiency of pretraining models in domain shift by constructing an ELECTRA pretraining model on a Japanese dataset and additional pretraining this model in a downstream task using a corpus from the target domain. We constructed a pretraining model for ELECTRA in Japanese and conducted experiments on a document classification task using data from Japanese news articles. Results show that even a model smaller than the pretrained model performs equally well.</abstract>
<identifier type="citekey">itoh-shinnou-2021-domain</identifier>
<location>
<url>https://aclanthology.org/2021.ranlp-1.72</url>
</location>
<part>
<date>2021-sep</date>
<extent unit="page">
<start>640</start>
<end>646</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Domain-Specific Japanese ELECTRA Model Using a Small Corpus
%A Itoh, Youki
%A Shinnou, Hiroyuki
%S Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)
%D 2021
%8 sep
%I INCOMA Ltd.
%C Held Online
%F itoh-shinnou-2021-domain
%X Recently, domain shift, which affects accuracy due to differences in data between source and target domains, has become a serious issue when using machine learning methods to solve natural language processing tasks. With additional pretraining and fine-tuning using a target domain corpus, pretraining models such as BERT (Bidirectional Encoder Representations from Transformers) can address this issue. However, the additional pretraining of the BERT model is difficult because it requires significant computing resources. The efficiently learning an encoder that classifies token replacements accurately (ELECTRA) pretraining model replaces the BERT pretraining method’s masked language modeling with a method called replaced token detection, which improves the computational efficiency and allows the additional pretraining of the model to a practical extent. Herein, we propose a method for addressing the computational efficiency of pretraining models in domain shift by constructing an ELECTRA pretraining model on a Japanese dataset and additional pretraining this model in a downstream task using a corpus from the target domain. We constructed a pretraining model for ELECTRA in Japanese and conducted experiments on a document classification task using data from Japanese news articles. Results show that even a model smaller than the pretrained model performs equally well.
%U https://aclanthology.org/2021.ranlp-1.72
%P 640-646
Markdown (Informal)
[Domain-Specific Japanese ELECTRA Model Using a Small Corpus](https://aclanthology.org/2021.ranlp-1.72) (Itoh & Shinnou, RANLP 2021)
ACL