@inproceedings{di-gangi-federico-2017-monolingual,
title = "Monolingual Embeddings for Low Resourced Neural Machine Translation",
author = "Di Gangi, Mattia Antonino and
Federico, Marcello",
editor = "Sakti, Sakriani and
Utiyama, Masao",
booktitle = "Proceedings of the 14th International Conference on Spoken Language Translation",
month = dec # " 14-15",
year = "2017",
address = "Tokyo, Japan",
publisher = "International Workshop on Spoken Language Translation",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2017.iwslt-1.14/",
pages = "97--104",
abstract = "Neural machine translation (NMT) is the state of the art for machine translation, and it shows the best performance when there is a considerable amount of data available. When only little data exist for a language pair, the model cannot produce good representations for words, particularly for rare words. One common solution consists in reducing data sparsity by segmenting words into sub-words, in order to allow rare words to have shared representations with other words. Taking a different approach, in this paper we present a method to feed an NMT network with word embeddings trained on monolingual data, which are combined with the task-specific embeddings learned at training time. This method can leverage an embedding matrix with a huge number of words, which can therefore extend the word-level vocabulary. Our experiments on two language pairs show good results for the typical low-resourced data scenario (IWSLT in-domain dataset). Our consistent improvements over the baselines represent a positive proof about the possibility to leverage models pre-trained on monolingual data in NMT."
}
Markdown (Informal)
[Monolingual Embeddings for Low Resourced Neural Machine Translation](https://preview.aclanthology.org/jlcl-multiple-ingestion/2017.iwslt-1.14/) (Di Gangi & Federico, IWSLT 2017)
ACL