@inproceedings{jauregi-unanue-piccardi-2020-pretrained,
title = "Pretrained Language Models and Backtranslation for {E}nglish-{B}asque Biomedical Neural Machine Translation",
author = "Jauregi Unanue, Inigo and
Piccardi, Massimo",
editor = {Barrault, Lo{\"i}c and
Bojar, Ond{\v{r}}ej and
Bougares, Fethi and
Chatterjee, Rajen and
Costa-juss{\`a}, Marta R. and
Federmann, Christian and
Fishel, Mark and
Fraser, Alexander and
Graham, Yvette and
Guzman, Paco and
Haddow, Barry and
Huck, Matthias and
Yepes, Antonio Jimeno and
Koehn, Philipp and
Martins, Andr{\'e} and
Morishita, Makoto and
Monz, Christof and
Nagata, Masaaki and
Nakazawa, Toshiaki and
Negri, Matteo},
booktitle = "Proceedings of the Fifth Conference on Machine Translation",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2020.wmt-1.89/",
pages = "826--832",
abstract = "This paper describes the machine translation systems proposed by the University of Technology Sydney Natural Language Processing (UTS{\_}NLP) team for the WMT20 English-Basque biomedical translation tasks. Due to the limited parallel corpora available, we have proposed to train a BERT-fused NMT model that leverages the use of pretrained language models. Furthermore, we have augmented the training corpus by backtranslating monolingual data. Our experiments show that NMT models in low-resource scenarios can benefit from combining these two training techniques, with improvements of up to 6.16 BLEU percentual points in the case of biomedical abstract translations."
}
Markdown (Informal)
[Pretrained Language Models and Backtranslation for English-Basque Biomedical Neural Machine Translation](https://preview.aclanthology.org/jlcl-multiple-ingestion/2020.wmt-1.89/) (Jauregi Unanue & Piccardi, WMT 2020)
ACL