@inproceedings{sahala-linden-2023-neural,
title = "A Neural Pipeline for {POS}-tagging and Lemmatizing Cuneiform Languages",
author = "Sahala, Aleksi and
Lind{\'e}n, Krister",
editor = "Anderson, Adam and
Gordin, Shai and
Li, Bin and
Liu, Yudong and
Passarotti, Marco C.",
booktitle = "Proceedings of the Ancient Language Processing Workshop",
month = sep,
year = "2023",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://preview.aclanthology.org/fix-sig-urls/2023.alp-1.23/",
pages = "203--212",
abstract = "We presented a pipeline for POS-tagging and lemmatizing cuneiform languages and evaluated its performance on Sumerian, first millennium Babylonian, Neo-Assyrian and Urartian texts extracted from Oracc. The system achieves a POS-tagging accuracy between 95-98{\%} and a lemmatization accuracy of 94-96{\%} depending on the language or dialect. For OOV words only, the current version can predict correct POS-tags for 83-91{\%}, and lemmata for 68-84{\%} of the input words. Compared with the earlier version, the current one has about 10{\%} higher accuracy in OOV lemmatization and POS-tagging due to better neural network performance. We also tested the system for lemmatizing and POS-tagging the PROIEL Ancient Greek and Latin treebanks, achieving results similar to those with the cuneiform languages."
}
Markdown (Informal)
[A Neural Pipeline for POS-tagging and Lemmatizing Cuneiform Languages](https://preview.aclanthology.org/fix-sig-urls/2023.alp-1.23/) (Sahala & Lindén, ALP 2023)
ACL