@inproceedings{wrobel-nowak-2022-transformer,
title = "Transformer-based Part-of-Speech Tagging and Lemmatization for {L}atin",
author = "Wr{\'o}bel, Krzysztof and
Nowak, Krzysztof",
editor = "Sprugnoli, Rachele and
Passarotti, Marco",
booktitle = "Proceedings of the Second Workshop on Language Technologies for Historical and Ancient Languages",
month = jun,
year = "2022",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2022.lt4hala-1.31/",
pages = "193--197",
abstract = "The paper presents a submission to the EvaLatin 2022 shared task. Our system places first for lemmatization, part-of-speech and morphological tagging in both closed and open modalities. The results for cross-genre and cross-time sub-tasks show that the system handles the diachronic and diastratic variation of Latin. The architecture employs state-of-the-art transformer models. For part-of-speech and morphological tagging, we use XLM-RoBERTa large, while for lemmatization a ByT5 small model was employed. The paper features a thorough discussion of part-of-speech and lemmatization errors which shows how the system performance may be improved for Classical, Medieval and Neo-Latin texts."
}
Markdown (Informal)
[Transformer-based Part-of-Speech Tagging and Lemmatization for Latin](https://preview.aclanthology.org/jlcl-multiple-ingestion/2022.lt4hala-1.31/) (Wróbel & Nowak, LT4HALA 2022)
ACL