@inproceedings{pugh-tyers-2024-experiments,
title = "Experiments in Multi-Variant Natural Language Processing for {N}ahuatl",
author = "Pugh, Robert and
Tyers, Francis",
editor = {Scherrer, Yves and
Jauhiainen, Tommi and
Ljube{\v{s}}i{\'c}, Nikola and
Zampieri, Marcos and
Nakov, Preslav and
Tiedemann, J{\"o}rg},
booktitle = "Proceedings of the Eleventh Workshop on NLP for Similar Languages, Varieties, and Dialects (VarDial 2024)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.vardial-1.12/",
doi = "10.18653/v1/2024.vardial-1.12",
pages = "140--151",
abstract = "Linguistic variation is a complicating factor for digital language technologies. This is particularly true for languages that lack an official {\textquotedblleft}standard{\textquotedblright} variety, including many regional and minoritized languages. In this paper, we describe a set of experiments focused on multivariant natural language processing for the Nahuatl, an indigenous Mexican language with a high level of linguistic variation and no single recognized standard variant. Using small (10k tokens), recently-published annotated datasets for two Nahuatl variants, we compare the performance of single-variant, cross-variant, and joint training, and explore how different models perform on a third Nahuatl variant, unseen in training. These results and the subsequent discussion contribute to efforts of developing low-resource NLP that is robust to diatopic variation. We share all code used to process the data and run the experiments."
}
Markdown (Informal)
[Experiments in Multi-Variant Natural Language Processing for Nahuatl](https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.vardial-1.12/) (Pugh & Tyers, VarDial 2024)
ACL