@article{lobzhanidze-etal-2026-megrelian,
title = "The Megrelian Language Corpus ({MLC}): Creation, Annotation, and Initial Steps toward a {UD} Treebank",
author = "Lobzhanidze, Irina and
Gersamia, Rusudan and
Gogia, Tamar",
editor = "Piperidis, Stelios and
Bel, N{\'u}ria and
van den Heuvel, Henk and
Ide, Nancy and
Krek, Simon and
Toral, Antonio",
journal = "International Conference on Language Resources and Evaluation",
volume = "main",
month = may,
year = "2026",
address = "Palma de Mallorca, Spain",
publisher = "ELRA Language Resource Association",
url = "https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.255/",
pages = "3250--3256",
abstract = "This paper presents the development of the Megrelian Language Corpus (MLC), a new language resource for the documentation and computational analysis of Megrelian, an endangered Kartvelian language. The corpus is based on fieldwork conducted in Samegrelo, Georgia (2022{--}2024) and currently contains 97,691 tokens and 60,959 types. The data were transcribed using the International Phonetic Alphabet (IPA) and annotated in Fieldworks Language Explorer (FLEx) with segmentation, morphological analysis and bilingual Georgian-English translations. Each text is accessible through a specially designed web interface, providing multiple tiers of annotation and integrated search functions. The paper describes the corpus design, annotation methodology and challenges encountered in representing Megrelian{'}s complex agglutinative morphology. It also outlines initial steps toward converting existing data into the Universal Dependencies (UD) framework, building on experience from related Kartvelian languages such as Georgian. The MLC corpus represents the first publicly available linguistic resource for Megrelian and provides a foundation for future UD treebank development."
}Markdown (Informal)
[The Megrelian Language Corpus (MLC): Creation, Annotation, and Initial Steps toward a UD Treebank](https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.255/) (Lobzhanidze et al., LREC 2026)
ACL