@inproceedings{wang-etal-2020-extending,
title = "Extending Multilingual {BERT} to Low-Resource Languages",
author = "Wang, Zihan and
K, Karthikeyan and
Mayhew, Stephen and
Roth, Dan",
editor = "Cohn, Trevor and
He, Yulan and
Liu, Yang",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2020",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2020.findings-emnlp.240/",
doi = "10.18653/v1/2020.findings-emnlp.240",
pages = "2649--2656",
abstract = "Multilingual BERT (M-BERT) has been a huge success in both supervised and zero-shot cross-lingual transfer learning. However, this success is focused only on the top 104 languages in Wikipedia it was trained on. In this paper, we propose a simple but effective approach to extend M-BERT E-MBERT so it can benefit any new language, and show that our approach aids languages that are already in M-BERT as well. We perform an extensive set of experiments with Named Entity Recognition (NER) on 27 languages, only 16 of which are in M-BERT, and show an average increase of about 6{\%} F1 on M-BERT languages and 23{\%} F1 increase on new languages. We release models and code at \url{http://cogcomp.org/page/publication_view/912}."
}
Markdown (Informal)
[Extending Multilingual BERT to Low-Resource Languages](https://preview.aclanthology.org/jlcl-multiple-ingestion/2020.findings-emnlp.240/) (Wang et al., Findings 2020)
ACL