@inproceedings{tyers-howell-2021-survey,
title = "A survey of part-of-speech tagging approaches applied to K`iche'",
author = "Tyers, Francis and
Howell, Nick",
editor = "Mager, Manuel and
Oncevay, Arturo and
Rios, Annette and
Ruiz, Ivan Vladimir Meza and
Palmer, Alexis and
Neubig, Graham and
Kann, Katharina",
booktitle = "Proceedings of the First Workshop on Natural Language Processing for Indigenous Languages of the Americas",
month = jun,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2021.americasnlp-1.6/",
doi = "10.18653/v1/2021.americasnlp-1.6",
pages = "44--52",
abstract = "We study the performance of several popular neural part-of-speech taggers from the Universal Dependencies ecosystem on Mayan languages using a small corpus of 1435 annotated K`iche' sentences consisting of approximately 10,000 tokens, with encouraging results: $F_1$ scores 93{\%}+ on lemmatisation, part-of-speech and morphological feature assignment. The high performance motivates a cross-language part-of-speech tagging study, where K`iche'-trained models are evaluated on two other Mayan languages, Kaqchikel and Uspanteko: performance on Kaqchikel is good, 63-85{\%}, and on Uspanteko modest, 60-71{\%}. Supporting experiments lead us to conclude the relative diversity of morphological features as a plausible explanation for the limiting factors in cross-language tagging performance, providing some direction for future sentence annotation and collection work to support these and other Mayan languages."
}
Markdown (Informal)
[A survey of part-of-speech tagging approaches applied to K’iche’](https://preview.aclanthology.org/jlcl-multiple-ingestion/2021.americasnlp-1.6/) (Tyers & Howell, AmericasNLP 2021)
ACL