@inproceedings{gordin-etal-2025-evacun,
title = "{E}va{C}un 2025 Shared Task: Lemmatization and Token Prediction in {A}kkadian and {S}umerian using {LLM}s",
author = "Gordin, Shai and
Sahala, Aleksi and
Spencer, Shahar and
Klein, Stav",
editor = "Anderson, Adam and
Gordin, Shai and
Li, Bin and
Liu, Yudong and
Passarotti, Marco C. and
Sprugnoli, Rachele",
booktitle = "Proceedings of the Second Workshop on Ancient Language Processing",
month = may,
year = "2025",
address = "The Albuquerque Convention Center, Laguna",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/corrections-2025-06/2025.alp-1.33/",
doi = "10.18653/v1/2025.alp-1.33",
pages = "242--250",
ISBN = "979-8-89176-235-0",
abstract = "The EvaCun 2025 Shared Task, organized as part of ALP 2025 workshop and co-located with NAACL 2025, explores how Large Language Models (LLMs) and transformer-based models can be used to improve lemmatization and token prediction tasks for low-resource ancient cuneiform texts. This year our datasets focused on the best attested ancient Near Eastern languages written in cuneiform, namely, Akkadian and Sumerian texts. However, we utilized the availability of datasets never before used on scale in NLP tasks, primarily first millennium literature (i.e. ``Canonical'') provided by the Electronic Babylonian Library (eBL), and Old Babylonian letters and archival texts, provided by Archibab. We aim to encourage the development of new computational methods to better analyze and reconstruct cuneiform inscriptions, pushing NLP forward for ancient and low-resource languages. Three teams competed for the lemmatization subtask and one for the token prediction subtask. Each subtask was evaluated alongside a baseline model, provided by the organizers."
}
Markdown (Informal)
[EvaCun 2025 Shared Task: Lemmatization and Token Prediction in Akkadian and Sumerian using LLMs](https://preview.aclanthology.org/corrections-2025-06/2025.alp-1.33/) (Gordin et al., ALP 2025)
ACL