@inproceedings{hafsteinsson-ingason-2020-developing,
title = "Developing a {F}aroese {P}o{S}-tagging solution using {I}celandic methods",
author = "Hafsteinsson, Hinrik and
Ingason, Anton Karl",
editor = "Bhattacharyya, Pushpak and
Sharma, Dipti Misra and
Sangal, Rajeev",
booktitle = "Proceedings of the 17th International Conference on Natural Language Processing (ICON)",
month = dec,
year = "2020",
address = "Indian Institute of Technology Patna, Patna, India",
publisher = "NLP Association of India (NLPAI)",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2020.icon-main.65/",
pages = "481--490",
abstract = "We describe the development of a dedicated, high-accuracy part-of-speech (PoS) tagging solution for Faroese, a North Germanic language with about 50,000 speakers. To achieve this, a state-of-the-art neural PoS tagger for Icelandic, ABLTagger, was trained on a 100,000 word PoS-tagged corpus for Faroese, standardised with methods previously applied to Icelandic corpora. This tagger was supplemented with a novel Experimental Database of Faroese Inflection (EDFM), which contains morphological information on 67,488 Faroese words with about one million inflectional forms. This approach produced a PoS-tagging model for Faroese which achieves a 91.40{\%} overall accuracy when evaluated with 10-fold cross validation, which is currently the highest reported accuracy for a dedicated Faroese PoS-tagger. The tagging model, morphological database, proposed revised PoS tagset for Faroese as well as a revised and standardised PoS tagged corpus are all presented as products of this project and are made available for use in further research in Faroese language technology"
}
Markdown (Informal)
[Developing a Faroese PoS-tagging solution using Icelandic methods](https://preview.aclanthology.org/jlcl-multiple-ingestion/2020.icon-main.65/) (Hafsteinsson & Ingason, ICON 2020)
ACL