@inproceedings{lu-etal-2025-lemmatization,
title = "Lemmatization of Cuneiform Languages Using the {B}y{T}5 Model",
author = "Lu, Pengxiu and
Huang, Yonglong and
Xu, Jing and
Feng, Minxuan and
Xu, Chao",
editor = "Anderson, Adam and
Gordin, Shai and
Li, Bin and
Liu, Yudong and
Passarotti, Marco C. and
Sprugnoli, Rachele",
booktitle = "Proceedings of the Second Workshop on Ancient Language Processing",
month = may,
year = "2025",
address = "The Albuquerque Convention Center, Laguna",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2025.alp-1.26/",
pages = "197--205",
ISBN = "979-8-89176-235-0",
abstract = "Lemmatization of cuneiform languages presents a unique challenge due to their complex writing system, which combines syllabic and logographic elements. In this study, we investigate the effectiveness of the ByT5 model in addressing this challenge by developing and evaluating a ByT5-based lemmatization system. Experimental results demonstrate that ByT5 outperforms mT5 in this task, achieving an accuracy of 80.55{\%} on raw lemmas and 82.59{\%} on generalized lemmas, where sense numbers are removed. These findings highlight the potential of ByT5 for lemmatizing cuneiform languages and provide useful insights for future work on ancient text lemmatization."
}
Markdown (Informal)
[Lemmatization of Cuneiform Languages Using the ByT5 Model](https://preview.aclanthology.org/fix-sig-urls/2025.alp-1.26/) (Lu et al., ALP 2025)
ACL
- Pengxiu Lu, Yonglong Huang, Jing Xu, Minxuan Feng, and Chao Xu. 2025. Lemmatization of Cuneiform Languages Using the ByT5 Model. In Proceedings of the Second Workshop on Ancient Language Processing, pages 197–205, The Albuquerque Convention Center, Laguna. Association for Computational Linguistics.