@inproceedings{manginas-etal-2020-layer,
title = "Layer-wise Guided Training for {BERT}: Learning Incrementally Refined Document Representations",
author = "Manginas, Nikolaos and
Chalkidis, Ilias and
Malakasiotis, Prodromos",
booktitle = "Proceedings of the Fourth Workshop on Structured Prediction for NLP",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.spnlp-1.7",
doi = "10.18653/v1/2020.spnlp-1.7",
pages = "53--61",
abstract = "Although BERT is widely used by the NLP community, little is known about its inner workings. Several attempts have been made to shed light on certain aspects of BERT, often with contradicting conclusions. A much raised concern focuses on BERT{'}s over-parameterization and under-utilization issues. To this end, we propose o novel approach to fine-tune BERT in a structured manner. Specifically, we focus on Large Scale Multilabel Text Classification (LMTC) where documents are assigned with one or more labels from a large predefined set of hierarchically organized labels. Our approach guides specific BERT layers to predict labels from specific hierarchy levels. Experimenting with two LMTC datasets we show that this structured fine-tuning approach not only yields better classification results but also leads to better parameter utilization.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="manginas-etal-2020-layer">
<titleInfo>
<title>Layer-wise Guided Training for BERT: Learning Incrementally Refined Document Representations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nikolaos</namePart>
<namePart type="family">Manginas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ilias</namePart>
<namePart type="family">Chalkidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Prodromos</namePart>
<namePart type="family">Malakasiotis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-nov</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth Workshop on Structured Prediction for NLP</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Although BERT is widely used by the NLP community, little is known about its inner workings. Several attempts have been made to shed light on certain aspects of BERT, often with contradicting conclusions. A much raised concern focuses on BERT’s over-parameterization and under-utilization issues. To this end, we propose o novel approach to fine-tune BERT in a structured manner. Specifically, we focus on Large Scale Multilabel Text Classification (LMTC) where documents are assigned with one or more labels from a large predefined set of hierarchically organized labels. Our approach guides specific BERT layers to predict labels from specific hierarchy levels. Experimenting with two LMTC datasets we show that this structured fine-tuning approach not only yields better classification results but also leads to better parameter utilization.</abstract>
<identifier type="citekey">manginas-etal-2020-layer</identifier>
<identifier type="doi">10.18653/v1/2020.spnlp-1.7</identifier>
<location>
<url>https://aclanthology.org/2020.spnlp-1.7</url>
</location>
<part>
<date>2020-nov</date>
<extent unit="page">
<start>53</start>
<end>61</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Layer-wise Guided Training for BERT: Learning Incrementally Refined Document Representations
%A Manginas, Nikolaos
%A Chalkidis, Ilias
%A Malakasiotis, Prodromos
%S Proceedings of the Fourth Workshop on Structured Prediction for NLP
%D 2020
%8 nov
%I Association for Computational Linguistics
%C Online
%F manginas-etal-2020-layer
%X Although BERT is widely used by the NLP community, little is known about its inner workings. Several attempts have been made to shed light on certain aspects of BERT, often with contradicting conclusions. A much raised concern focuses on BERT’s over-parameterization and under-utilization issues. To this end, we propose o novel approach to fine-tune BERT in a structured manner. Specifically, we focus on Large Scale Multilabel Text Classification (LMTC) where documents are assigned with one or more labels from a large predefined set of hierarchically organized labels. Our approach guides specific BERT layers to predict labels from specific hierarchy levels. Experimenting with two LMTC datasets we show that this structured fine-tuning approach not only yields better classification results but also leads to better parameter utilization.
%R 10.18653/v1/2020.spnlp-1.7
%U https://aclanthology.org/2020.spnlp-1.7
%U https://doi.org/10.18653/v1/2020.spnlp-1.7
%P 53-61
Markdown (Informal)
[Layer-wise Guided Training for BERT: Learning Incrementally Refined Document Representations](https://aclanthology.org/2020.spnlp-1.7) (Manginas et al., spnlp 2020)
ACL