@inproceedings{hakimi-parizi-cook-2020-evaluating,
title = "Evaluating Sub-word Embeddings in Cross-lingual Models",
author = "Hakimi Parizi, Ali and
Cook, Paul",
editor = "Calzolari, Nicoletta and
B{\'e}chet, Fr{\'e}d{\'e}ric and
Blache, Philippe and
Choukri, Khalid and
Cieri, Christopher and
Declerck, Thierry and
Goggi, Sara and
Isahara, Hitoshi and
Maegaard, Bente and
Mariani, Joseph and
Mazo, H{\'e}l{\`e}ne and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Twelfth Language Resources and Evaluation Conference",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://preview.aclanthology.org/fix-sig-urls/2020.lrec-1.330/",
pages = "2712--2719",
language = "eng",
ISBN = "979-10-95546-34-4",
abstract = "Cross-lingual word embeddings create a shared space for embeddings in two languages, and enable knowledge to be transferred between languages for tasks such as bilingual lexicon induction. One problem, however, is out-of-vocabulary (OOV) words, for which no embeddings are available. This is particularly problematic for low-resource and morphologically-rich languages, which often have relatively high OOV rates. Approaches to learning sub-word embeddings have been proposed to address the problem of OOV words, but most prior work has not considered sub-word embeddings in cross-lingual models. In this paper, we consider whether sub-word embeddings can be leveraged to form cross-lingual embeddings for OOV words. Specifically, we consider a novel bilingual lexicon induction task focused on OOV words, for language pairs covering several language families. Our results indicate that cross-lingual representations for OOV words can indeed be formed from sub-word embeddings, including in the case of a truly low-resource morphologically-rich language."
}
Markdown (Informal)
[Evaluating Sub-word Embeddings in Cross-lingual Models](https://preview.aclanthology.org/fix-sig-urls/2020.lrec-1.330/) (Hakimi Parizi & Cook, LREC 2020)
ACL