@inproceedings{elsner-liu-2025-prompt,
title = "Prompt and circumstance'':'' A word-by-word {LLM} prompting approach to interlinear glossing for low-resource languages",
author = "Elsner, Micha and
Liu, David",
editor = {Nicolai, Garrett and
Chodroff, Eleanor and
Mailhot, Frederic and
{\c{C}}{\"o}ltekin, {\c{C}}a{\u{g}}r{\i}},
booktitle = "Proceedings of the The 22nd SIGMORPHON workshop on Computational Morphology, Phonology, and Phonetics",
month = may,
year = "2025",
address = "Albuquerque, New Mexico, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2025.sigmorphon-main.1/",
pages = "1--14",
ISBN = "979-8-89176-231-2",
abstract = "This paper presents VeLePa, an inflected verbal lexicon of Central Pame (pbs, cent2154), an Otomanguean language from Mexico. This resource contains 12528 words in phonological form representing the complete inflectional paradigms of 216 verbs, supplemented with use frequencies. Computer-operable (CLDF) inflected lexicons of non-WEIRD underresourced languages are urgently needed to expand digital capacities in this languages (e.g. in NLP). VeLePa contributes to this, and does so with data from a language which is morphologically extraordinary, with unusually high levels of irregularity and multiple conjugations at various loci within the word'':'' prefixes, stems, tone, and suffixes constitute different albeit interrelated subsystems of inflection. Partly automated creation of interlinear glossed text (IGT) has the potential to assist in linguistic documentation. We argue that LLMs can make this process more accessible to linguists because of their capacity to follow natural-language instructions. We investigate the effectiveness of a retrieval-based LLM prompting approach to glossing, applied to the seven languages from the SIGMORPHON 2023 shared task. Our system beats the BERTbased shared task baseline for every language in the morpheme-level score category, and we show that a simple 3-best oracle has higher word-level scores than the challenge winner (a tuned sequence model) in five languages. In a case study on Tsez, we ask the LLM to automatically create and follow linguistic instructions, reducing errors on a confusing grammatical feature. Our results thus demonstrate the potential contributions which LLMs can make in interactive systems for glossing, both in making suggestions to human annotators and following directions."
}
Markdown (Informal)
[Prompt and circumstance”:" A word-by-word LLM prompting approach to interlinear glossing for low-resource languages](https://preview.aclanthology.org/fix-sig-urls/2025.sigmorphon-main.1/) (Elsner & Liu, SIGMORPHON 2025)
ACL