@inproceedings{ljubesic-etal-2017-adapting,
title = "Adapting a State-of-the-Art Tagger for {S}outh {S}lavic Languages to Non-Standard Text",
author = "Ljube{\v{s}}i{\'c}, Nikola and
Erjavec, Toma{\v{z}} and
Fi{\v{s}}er, Darja",
editor = "Erjavec, Toma{\v{z}} and
Piskorski, Jakub and
Pivovarova, Lidia and
{\v{S}}najder, Jan and
Steinberger, Josef and
Yangarber, Roman",
booktitle = "Proceedings of the 6th Workshop on {B}alto-{S}lavic Natural Language Processing",
month = apr,
year = "2017",
address = "Valencia, Spain",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/W17-1410/",
doi = "10.18653/v1/W17-1410",
pages = "60--68",
abstract = "In this paper we present the adaptations of a state-of-the-art tagger for South Slavic languages to non-standard texts on the example of the Slovene language. We investigate the impact of introducing in-domain training data as well as additional supervision through external resources or tools like word clusters and word normalization. We remove more than half of the error of the standard tagger when applied to non-standard texts by training it on a combination of standard and non-standard training data, while enriching the data representation with external resources removes additional 11 percent of the error. The final configuration achieves tagging accuracy of 87.41{\%} on the full morphosyntactic description, which is, nevertheless, still quite far from the accuracy of 94.27{\%} achieved on standard text."
}
Markdown (Informal)
[Adapting a State-of-the-Art Tagger for South Slavic Languages to Non-Standard Text](https://preview.aclanthology.org/fix-sig-urls/W17-1410/) (Ljubešić et al., BSNLP 2017)
ACL