@inproceedings{yang-etal-2020-ggp,
title = "{GGP}: Glossary Guided Post-processing for Word Embedding Learning",
author = "Yang, Ruosong and
Cao, Jiannong and
Wen, Zhiyuan",
editor = "Calzolari, Nicoletta and
B{\'e}chet, Fr{\'e}d{\'e}ric and
Blache, Philippe and
Choukri, Khalid and
Cieri, Christopher and
Declerck, Thierry and
Goggi, Sara and
Isahara, Hitoshi and
Maegaard, Bente and
Mariani, Joseph and
Mazo, H{\'e}l{\`e}ne and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Twelfth Language Resources and Evaluation Conference",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2020.lrec-1.581/",
pages = "4726--4730",
language = "eng",
ISBN = "979-10-95546-34-4",
abstract = "Word embedding learning is the task to map each word into a low-dimensional and continuous vector based on a large corpus. To enhance corpus based word embedding models, researchers utilize domain knowledge to learn more distinguishable representations via joint optimization and post-processing based models. However, joint optimization based models require much training time. Existing post-processing models mostly consider semantic knowledge while learned embedding models show less functional information. Glossary is a comprehensive linguistic resource. And in previous works, the glossary is usually used to enhance the word representations via joint optimization based methods. In this paper, we post-process pre-trained word embedding models with incorporating the glossary and capture more topical and functional information. We propose GGP (Glossary Guided Post-processing word embedding) model which consists of a global post-processing function to fine-tune each word vector, and an auto-encoding model to learn sense representations, furthermore, constrains each post-processed word representation and the composition of its sense representations to be similar. We evaluate our model by comparing it with two state-of-the-art models on six word topical/functional similarity datasets, and the results show that it outperforms competitors by an average of 4.1{\%} across all datasets. And our model outperforms GloVe by more than 7{\%}."
}
Markdown (Informal)
[GGP: Glossary Guided Post-processing for Word Embedding Learning](https://preview.aclanthology.org/jlcl-multiple-ingestion/2020.lrec-1.581/) (Yang et al., LREC 2020)
ACL