@inproceedings{yang-etal-2025-navajo,
title = "Is It {N}avajo? Accurate Language Detection for Endangered Athabaskan Languages",
author = "Yang, Ivory and
Ma, Weicheng and
Zhang, Chunhui and
Vosoughi, Soroush",
editor = "Chiruzzo, Luis and
Ritter, Alan and
Wang, Lu",
booktitle = "Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 2: Short Papers)",
month = apr,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2025.naacl-short.24/",
pages = "277--284",
ISBN = "979-8-89176-190-2",
abstract = "Endangered languages, such as Navajo{---}the most widely spoken Native American language{---}are significantly underrepresented in contemporary language technologies, exacerbating the challenges of their preservation and revitalization. This study evaluates Google{'}s Language Identification (LangID) tool, which does not currently support any Native American languages. To address this, we introduce a random forest classifier trained on Navajo and twenty erroneously suggested languages by LangID. Despite its simplicity, the classifier achieves near-perfect accuracy (97-100{\%}). Additionally, the model demonstrates robustness across other Athabaskan languages{---}a family of Native American languages spoken primarily in Alaska, the Pacific Northwest, and parts of the Southwestern United States{---}suggesting its potential for broader application. Our findings underscore the pressing need for NLP systems that prioritize linguistic diversity and adaptability over centralized, one-size-fits-all solutions, especially in supporting underrepresented languages in a multicultural world. This work directly contributes to ongoing efforts to address cultural biases in language models and advocates for the development of culturally localized NLP tools that serve diverse linguistic communities."
}
Markdown (Informal)
[Is It Navajo? Accurate Language Detection for Endangered Athabaskan Languages](https://preview.aclanthology.org/fix-sig-urls/2025.naacl-short.24/) (Yang et al., NAACL 2025)
ACL
- Ivory Yang, Weicheng Ma, Chunhui Zhang, and Soroush Vosoughi. 2025. Is It Navajo? Accurate Language Detection for Endangered Athabaskan Languages. In Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 2: Short Papers), pages 277–284, Albuquerque, New Mexico. Association for Computational Linguistics.