@inproceedings{robertson-2026-evaluating,
title = "Evaluating Frontier {LLM} Translation Capability for {L}akota",
author = "Robertson, Lance",
editor = "Mager, Manuel and
Ebrahimi, Abteen and
Bui, Minh Duc and
Pugh, Robert and
Oncevay, Arturo and
Chiruzzo, Luis and
Solano, Rolando Coto and
Rijhwani, Shruti and
Von Der Wense, Katharina",
booktitle = "Proceedings of the Sixth Workshop on {NLP} for Indigenous Languages of the {A}mericas ({A}mericas{NLP})",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.americasnlp-6.2/",
pages = "11--21",
ISBN = "979-8-89176-415-6",
abstract = "We evaluate seven large language models{---}four proprietary and three open-weight{---}on bidirectional Lakota{--}English translation using 200 sentence pairs from the New Lakota Dictionary. Each model is evaluated with and without extended reasoning, where the provider{'}s API permits. The best model (Gemini 3.1 Pro) achieves a mean chrF++ of 59.4 on Lakota{\textrightarrow}English and 42.6 on English{\textrightarrow}Lakota; the strongest open-weight model trails the proprietary leaders, and no model produces reliable translation in either direction. Two independent LLM judges from different model families agree substantially (Cohen{'}s {\ensuremath{\kappa}}=0.75) that semantic equivalence ranges from 6{\%} (GPT-5.2) to 60{\%} (Gemini), diverging substantially from chrF++ scores. For the open-weight models, enabling reasoning changes refusal behavior far more than translation quality: it surfaces the limitation rather than overcoming it. Diacritic-normalization analysis shows models produce roughly correct base characters but place diacritical marks inconsistently. All results and evaluation code are publicly available at https://github.com/robotson/lakota-translation-benchmark."
}Markdown (Informal)
[Evaluating Frontier LLM Translation Capability for Lakota](https://preview.aclanthology.org/ingest-acl-workshops/2026.americasnlp-6.2/) (Robertson, AmericasNLP 2026)
ACL