@inproceedings{dorkin-sirts-2023-comparison,
title = "Comparison of Current Approaches to Lemmatization: A Case Study in {E}stonian",
author = "Dorkin, Aleksei and
Sirts, Kairit",
editor = {Alum{\"a}e, Tanel and
Fishel, Mark},
booktitle = "Proceedings of the 24th Nordic Conference on Computational Linguistics (NoDaLiDa)",
month = may,
year = "2023",
address = "T{\'o}rshavn, Faroe Islands",
publisher = "University of Tartu Library",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.nodalida-1.28/",
pages = "280--285",
abstract = "This study evaluates three different lemmatization approaches to Estonian{---}Generative character-level models, Pattern-based word-level classification models, and rule-based morphological analysis. According to our experiments, a significantly smaller Generative model consistently outperforms the Pattern-based classification model based on EstBERT. Additionally, we observe a relatively small overlap in errors made by all three models, indicating that an ensemble of different approach could lead to improvements."
}
Markdown (Informal)
[Comparison of Current Approaches to Lemmatization: A Case Study in Estonian](https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.nodalida-1.28/) (Dorkin & Sirts, NoDaLiDa 2023)
ACL