@inproceedings{shavarani-sarkar-2021-better,
title = "Better Neural Machine Translation by Extracting Linguistic Information from {BERT}",
author = "Shavarani, Hassan S. and
Sarkar, Anoop",
booktitle = "Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume",
month = apr,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.eacl-main.241",
doi = "10.18653/v1/2021.eacl-main.241",
pages = "2772--2783",
abstract = "Adding linguistic information (syntax or semantics) to neural machine translation (NMT) have mostly focused on using point estimates from pre-trained models. Directly using the capacity of massive pre-trained contextual word embedding models such as BERT(Devlin et al., 2019) has been marginally useful in NMT because effective fine-tuning is difficult to obtain for NMT without making training brittle and unreliable. We augment NMT by extracting dense fine-tuned vector-based linguistic information from BERT instead of using point estimates. Experimental results show that our method of incorporating linguistic information helps NMT to generalize better in a variety of training contexts and is no more difficult to train than conventional Transformer-based NMT.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="shavarani-sarkar-2021-better">
<titleInfo>
<title>Better Neural Machine Translation by Extracting Linguistic Information from BERT</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hassan</namePart>
<namePart type="given">S</namePart>
<namePart type="family">Shavarani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anoop</namePart>
<namePart type="family">Sarkar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-apr</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Adding linguistic information (syntax or semantics) to neural machine translation (NMT) have mostly focused on using point estimates from pre-trained models. Directly using the capacity of massive pre-trained contextual word embedding models such as BERT(Devlin et al., 2019) has been marginally useful in NMT because effective fine-tuning is difficult to obtain for NMT without making training brittle and unreliable. We augment NMT by extracting dense fine-tuned vector-based linguistic information from BERT instead of using point estimates. Experimental results show that our method of incorporating linguistic information helps NMT to generalize better in a variety of training contexts and is no more difficult to train than conventional Transformer-based NMT.</abstract>
<identifier type="citekey">shavarani-sarkar-2021-better</identifier>
<identifier type="doi">10.18653/v1/2021.eacl-main.241</identifier>
<location>
<url>https://aclanthology.org/2021.eacl-main.241</url>
</location>
<part>
<date>2021-apr</date>
<extent unit="page">
<start>2772</start>
<end>2783</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Better Neural Machine Translation by Extracting Linguistic Information from BERT
%A Shavarani, Hassan S.
%A Sarkar, Anoop
%S Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume
%D 2021
%8 apr
%I Association for Computational Linguistics
%C Online
%F shavarani-sarkar-2021-better
%X Adding linguistic information (syntax or semantics) to neural machine translation (NMT) have mostly focused on using point estimates from pre-trained models. Directly using the capacity of massive pre-trained contextual word embedding models such as BERT(Devlin et al., 2019) has been marginally useful in NMT because effective fine-tuning is difficult to obtain for NMT without making training brittle and unreliable. We augment NMT by extracting dense fine-tuned vector-based linguistic information from BERT instead of using point estimates. Experimental results show that our method of incorporating linguistic information helps NMT to generalize better in a variety of training contexts and is no more difficult to train than conventional Transformer-based NMT.
%R 10.18653/v1/2021.eacl-main.241
%U https://aclanthology.org/2021.eacl-main.241
%U https://doi.org/10.18653/v1/2021.eacl-main.241
%P 2772-2783
Markdown (Informal)
[Better Neural Machine Translation by Extracting Linguistic Information from BERT](https://aclanthology.org/2021.eacl-main.241) (Shavarani & Sarkar, EACL 2021)
ACL