@inproceedings{steingrimsson-etal-2019-augmenting,
title = "Augmenting a {B}i{LSTM} Tagger with a Morphological Lexicon and a Lexical Category Identification Step",
author = {Steingr{\'i}msson, Stein{\th}{\'o}r and
K{\'a}rason, {\"O}rvar and
Loftsson, Hrafn},
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2019)",
month = sep,
year = "2019",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd.",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/R19-1133/",
doi = "10.26615/978-954-452-056-4_133",
pages = "1161--1168",
abstract = "Previous work on using BiLSTM models for PoS tagging has primarily focused on small tagsets. We evaluate BiLSTM models for tagging Icelandic, a morphologically rich language, using a relatively large tagset. Our baseline BiLSTM model achieves higher accuracy than any other previously published tagger, when not taking advantage of a morphological lexicon. When we extend the model by incorporating such data, we outperform the earlier state-of-the-art results by a significant margin. We also report on work in progress that attempts to address the problem of data sparsity inherent to morphologically detailed, fine-grained tagsets. We experiment with training a separate model on only the lexical category and using the coarse-grained output tag as an input into to the main model. This method further increases the accuracy and reduces the tagging errors by 21.3{\%} compared to previous state-of-the-art results. Finally, we train and test our tagger on a new gold standard for Icelandic."
}
Markdown (Informal)
[Augmenting a BiLSTM Tagger with a Morphological Lexicon and a Lexical Category Identification Step](https://preview.aclanthology.org/jlcl-multiple-ingestion/R19-1133/) (Steingrímsson et al., RANLP 2019)
ACL