@inproceedings{schwenk-douze-2017-learning,
title = "Learning Joint Multilingual Sentence Representations with Neural Machine Translation",
author = "Schwenk, Holger and
Douze, Matthijs",
editor = "Blunsom, Phil and
Bordes, Antoine and
Cho, Kyunghyun and
Cohen, Shay and
Dyer, Chris and
Grefenstette, Edward and
Hermann, Karl Moritz and
Rimell, Laura and
Weston, Jason and
Yih, Scott",
booktitle = "Proceedings of the 2nd Workshop on Representation Learning for {NLP}",
month = aug,
year = "2017",
address = "Vancouver, Canada",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/W17-2619/",
doi = "10.18653/v1/W17-2619",
pages = "157--167",
abstract = "In this paper, we use the framework of neural machine translation to learn joint sentence representations across six very different languages. Our aim is that a representation which is independent of the language, is likely to capture the underlying semantics. We define a new cross-lingual similarity measure, compare up to 1.4M sentence representations and study the characteristics of close sentences. We provide experimental evidence that sentences that are close in embedding space are indeed semantically highly related, but often have quite different structure and syntax. These relations also hold when comparing sentences in different languages."
}
Markdown (Informal)
[Learning Joint Multilingual Sentence Representations with Neural Machine Translation](https://preview.aclanthology.org/jlcl-multiple-ingestion/W17-2619/) (Schwenk & Douze, RepL4NLP 2017)
ACL