@inproceedings{faruqui-etal-2017-cross,
title = "Cross-Lingual Word Representations: Induction and Evaluation",
author = "Faruqui, Manaal and
S{\o}gaard, Anders and
Vuli{\'c}, Ivan",
editor = "Birch, Alexandra and
Schneider, Nathan",
booktitle = "Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing: Tutorial Abstracts",
month = sep,
year = "2017",
address = "Copenhagen, Denmark",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/D17-3007/",
abstract = "In recent past, NLP as a field has seen tremendous utility of distributional word vector representations as features in downstream tasks. The fact that these word vectors can be trained on unlabeled monolingual corpora of a language makes them an inexpensive resource in NLP. With the increasing use of monolingual word vectors, there is a need for word vectors that can be used as efficiently across multiple languages as monolingually. Therefore, learning bilingual and multilingual word embeddings/vectors is currently an important research topic. These vectors offer an elegant and language-pair independent way to represent content across different languages.This tutorial aims to bring NLP researchers up to speed with the current techniques in cross-lingual word representation learning. We will first discuss how to induce cross-lingual word representations (covering both bilingual and multilingual ones) from various data types and resources (e.g., parallel data, comparable data, non-aligned monolingual data in different languages, dictionaries and theasuri, or, even, images, eye-tracking data). We will then discuss how to evaluate such representations, intrinsically and extrinsically. We will introduce researchers to state-of-the-art methods for constructing cross-lingual word representations and discuss their applicability in a broad range of downstream NLP applications.We will deliver a detailed survey of the current methods, discuss best training and evaluation practices and use-cases, and provide links to publicly available implementations, datasets, and pre-trained models."
}
Markdown (Informal)
[Cross-Lingual Word Representations: Induction and Evaluation](https://preview.aclanthology.org/jlcl-multiple-ingestion/D17-3007/) (Faruqui et al., EMNLP 2017)
ACL