@inproceedings{wang-etal-2022-english,
title = "{E}nglish Contrastive Learning Can Learn Universal Cross-lingual Sentence Embeddings",
author = "Wang, Yaushian and
Wu, Ashley and
Neubig, Graham",
editor = "Goldberg, Yoav and
Kozareva, Zornitsa and
Zhang, Yue",
booktitle = "Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2022.emnlp-main.621/",
doi = "10.18653/v1/2022.emnlp-main.621",
pages = "9122--9133",
abstract = "Universal cross-lingual sentence embeddings map semantically similar cross-lingual sentences into a shared embedding space. Aligning cross-lingual sentence embeddings usually requires supervised cross-lingual parallel sentences. In this work, we propose mSimCSE, which extends SimCSE to multilingual settings and reveal that contrastive learning on English data can surprisingly learn high-quality universal cross-lingual sentence embeddings without any parallel data.In unsupervised and weakly supervised settings, mSimCSE significantly improves previous sentence embedding methods on cross-lingual retrieval and multilingual STS tasks. The performance of unsupervised mSimCSE is comparable to fully supervised methods in retrieving low-resource languages and multilingual STS.The performance can be further enhanced when cross-lingual NLI data is available."
}
Markdown (Informal)
[English Contrastive Learning Can Learn Universal Cross-lingual Sentence Embeddings](https://preview.aclanthology.org/fix-sig-urls/2022.emnlp-main.621/) (Wang et al., EMNLP 2022)
ACL