@inproceedings{limisiewicz-marecek-2021-examining,
title = "Examining Cross-lingual Contextual Embeddings with Orthogonal Structural Probes",
author = "Limisiewicz, Tomasz and
Mare{\v{c}}ek, David",
editor = "Moens, Marie-Francine and
Huang, Xuanjing and
Specia, Lucia and
Yih, Scott Wen-tau",
booktitle = "Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2021",
address = "Online and Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2021.emnlp-main.376/",
doi = "10.18653/v1/2021.emnlp-main.376",
pages = "4589--4598",
abstract = "State-of-the-art contextual embeddings are obtained from large language models available only for a few languages. For others, we need to learn representations using a multilingual model. There is an ongoing debate on whether multilingual embeddings can be aligned in a space shared across many languages. The novel Orthogonal Structural Probe (Limisiewicz and Mare{\v{c}}ek, 2021) allows us to answer this question for specific linguistic features and learn a projection based only on mono-lingual annotated datasets. We evaluate syntactic (UD) and lexical (WordNet) structural information encoded inmBERT`s contextual representations for nine diverse languages. We observe that for languages closely related to English, no transformation is needed. The evaluated information is encoded in a shared cross-lingual embedding space. For other languages, it is beneficial to apply orthogonal transformation learned separately for each language. We successfully apply our findings to zero-shot and few-shot cross-lingual parsing."
}
Markdown (Informal)
[Examining Cross-lingual Contextual Embeddings with Orthogonal Structural Probes](https://preview.aclanthology.org/jlcl-multiple-ingestion/2021.emnlp-main.376/) (Limisiewicz & Mareček, EMNLP 2021)
ACL