@article{berend-2017-sparse,
title = "Sparse Coding of Neural Word Embeddings for Multilingual Sequence Labeling",
author = "Berend, G{\'a}bor",
editor = "Lee, Lillian and
Johnson, Mark and
Toutanova, Kristina",
journal = "Transactions of the Association for Computational Linguistics",
volume = "5",
year = "2017",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://preview.aclanthology.org/Ingest-2025-COMPUTEL/Q17-1018/",
doi = "10.1162/tacl_a_00059",
pages = "247--261",
abstract = "In this paper we propose and carefully evaluate a sequence labeling framework which solely utilizes sparse indicator features derived from dense distributed word representations. The proposed model obtains (near) state-of-the art performance for both part-of-speech tagging and named entity recognition for a variety of languages. Our model relies only on a few thousand sparse coding-derived features, without applying any modification of the word representations employed for the different tasks. The proposed model has favorable generalization properties as it retains over 89.8{\%} of its average POS tagging accuracy when trained at 1.2{\%} of the total available training data, i.e. 150 sentences per language."
}
Markdown (Informal)
[Sparse Coding of Neural Word Embeddings for Multilingual Sequence Labeling](https://preview.aclanthology.org/Ingest-2025-COMPUTEL/Q17-1018/) (Berend, TACL 2017)
ACL