@inproceedings{libovicky-etal-2020-language,
title = "On the Language Neutrality of Pre-trained Multilingual Representations",
author = "Libovick{\'y}, Jind{\v{r}}ich and
Rosa, Rudolf and
Fraser, Alexander",
editor = "Cohn, Trevor and
He, Yulan and
Liu, Yang",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2020",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/Author-page-Marten-During-lu/2020.findings-emnlp.150/",
doi = "10.18653/v1/2020.findings-emnlp.150",
pages = "1663--1674",
abstract = "Multilingual contextual embeddings, such as multilingual BERT and XLM-RoBERTa, have proved useful for many multi-lingual tasks. Previous work probed the cross-linguality of the representations indirectly using zero-shot transfer learning on morphological and syntactic tasks. We instead investigate the language-neutrality of multilingual contextual embeddings directly and with respect to lexical semantics. Our results show that contextual embeddings are more language-neutral and, in general, more informative than aligned static word-type embeddings, which are explicitly trained for language neutrality. Contextual embeddings are still only moderately language-neutral by default, so we propose two simple methods for achieving stronger language neutrality: first, by unsupervised centering of the representation for each language and second, by fitting an explicit projection on small parallel data. Besides, we show how to reach state-of-the-art accuracy on language identification and match the performance of statistical methods for word alignment of parallel sentences without using parallel data."
}
Markdown (Informal)
[On the Language Neutrality of Pre-trained Multilingual Representations](https://preview.aclanthology.org/Author-page-Marten-During-lu/2020.findings-emnlp.150/) (Libovický et al., Findings 2020)
ACL