@inproceedings{eger-etal-2016-lemmatization,
title = "Lemmatization and Morphological Tagging in {G}erman and {L}atin: A Comparison and a Survey of the State-of-the-art",
author = {Eger, Steffen and
Gleim, R{\"u}diger and
Mehler, Alexander},
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Goggi, Sara and
Grobelnik, Marko and
Maegaard, Bente and
Mariani, Joseph and
Mazo, Helene and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Tenth International Conference on Language Resources and Evaluation ({LREC}`16)",
month = may,
year = "2016",
address = "Portoro{\v{z}}, Slovenia",
publisher = "European Language Resources Association (ELRA)",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/L16-1239/",
pages = "1507--1513",
abstract = "This paper relates to the challenge of morphological tagging and lemmatization in morphologically rich languages by example of German and Latin. We focus on the question what a practitioner can expect when using state-of-the-art solutions out of the box. Moreover, we contrast these with old(er) methods and implementations for POS tagging. We examine to what degree recent efforts in tagger development are reflected by improved accuracies {\textemdash} and at what cost, in terms of training and processing time. We also conduct in-domain vs. out-domain evaluation. Out-domain evaluations are particularly insightful because the distribution of the data which is being tagged by a user will typically differ from the distribution on which the tagger has been trained. Furthermore, two lemmatization techniques are evaluated. Finally, we compare pipeline tagging vs. a tagging approach that acknowledges dependencies between inflectional categories."
}
Markdown (Informal)
[Lemmatization and Morphological Tagging in German and Latin: A Comparison and a Survey of the State-of-the-art](https://preview.aclanthology.org/jlcl-multiple-ingestion/L16-1239/) (Eger et al., LREC 2016)
ACL