@inproceedings{li-etal-2018-subword,
title = "Subword-level Composition Functions for Learning Word Embeddings",
author = "Li, Bofang and
Drozd, Aleksandr and
Liu, Tao and
Du, Xiaoyong",
editor = {Faruqui, Manaal and
Sch{\"u}tze, Hinrich and
Trancoso, Isabel and
Tsvetkov, Yulia and
Yaghoobzadeh, Yadollah},
booktitle = "Proceedings of the Second Workshop on Subword/Character {LE}vel Models",
month = jun,
year = "2018",
address = "New Orleans",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/W18-1205/",
doi = "10.18653/v1/W18-1205",
pages = "38--48",
abstract = "Subword-level information is crucial for capturing the meaning and morphology of words, especially for out-of-vocabulary entries. We propose CNN- and RNN-based subword-level composition functions for learning word embeddings, and systematically compare them with popular word-level and subword-level models (Skip-Gram and FastText). Additionally, we propose a hybrid training scheme in which a pure subword-level model is trained jointly with a conventional word-level embedding model based on lookup-tables. This increases the fitness of all types of subword-level word embeddings; the word-level embeddings can be discarded after training, leaving only compact subword-level representation with much smaller data volume. We evaluate these embeddings on a set of intrinsic and extrinsic tasks, showing that subword-level models have advantage on tasks related to morphology and datasets with high OOV rate, and can be combined with other types of embeddings."
}
Markdown (Informal)
[Subword-level Composition Functions for Learning Word Embeddings](https://preview.aclanthology.org/jlcl-multiple-ingestion/W18-1205/) (Li et al., SCLeM 2018)
ACL