@inproceedings{hamalainen-wiechetek-2020-morphological,
title = "Morphological Disambiguation of {S}outh {S}{\'a}mi with {FST}s and Neural Networks",
author = {H{\"a}m{\"a}l{\"a}inen, Mika and
Wiechetek, Linda},
editor = "Beermann, Dorothee and
Besacier, Laurent and
Sakti, Sakriani and
Soria, Claudia",
booktitle = "Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources association",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2020.sltu-1.5/",
pages = "36--40",
language = "eng",
ISBN = "979-10-95546-35-1",
abstract = "We present a method for conducting morphological disambiguation for South S{\'a}mi, which is an endangered language. Our method uses an FST-based morphological analyzer to produce an ambiguous set of morphological readings for each word in a sentence. These readings are disambiguated with a Bi-RNN model trained on the related North S{\'a}mi UD Treebank and some synthetically generated South S{\'a}mi data. The disambiguation is done on the level of morphological tags ignoring word forms and lemmas; this makes it possible to use North S{\'a}mi training data for South S{\'a}mi without the need for a bilingual dictionary or aligned word embeddings. Our approach requires only minimal resources for South S{\'a}mi, which makes it usable and applicable in the contexts of any other endangered language as well."
}
Markdown (Informal)
[Morphological Disambiguation of South Sámi with FSTs and Neural Networks](https://preview.aclanthology.org/add-emnlp-2024-awards/2020.sltu-1.5/) (Hämäläinen & Wiechetek, SLTU 2020)
ACL
- Mika Hämäläinen and Linda Wiechetek. 2020. Morphological Disambiguation of South Sámi with FSTs and Neural Networks. In Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL), pages 36–40, Marseille, France. European Language Resources association.