@inproceedings{wiechetek-etal-2025-create,
title = "How to Create Treebanks without Human Annotators {--} An Indigenous Language Grammar Checker for Treebank Construction",
author = "Wiechetek, Linda and
Pirinen, Flammie A and
Kappfjell, Maja Lisa",
editor = {Jablotschkin, Sarah and
K{\"u}bler, Sandra and
Zinsmeister, Heike},
booktitle = "Proceedings of the 23rd International Workshop on Treebanks and Linguistic Theories (TLT, SyntaxFest 2025)",
month = aug,
year = "2025",
address = "Ljubljana, Slovenia",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/corrections-2025-08/2025.tlt-1.14/",
pages = "119--128",
ISBN = "979-8-89176-291-6",
abstract = "Creating treebanks for low resource languages is an important task. However, low resource Indigenous language contexts have not only limited resources in terms of text data, but also limited human resources that are available for linguistic annotation. We suggest a work-around by applying a Constraint Grammar operated rule-based dependency parser to do the work of creating a marked-up treebank. However, due to a lot of noise, meaning spelling and grammatical errors in South S{\'a}mi written texts, this tool often fails to create complete and correct trees. As a fix to this, we created a grammar checking tool for the most common South S{\'a}mi grammatical error types, which improves the quality of the dependency parser significantly. As both literacy and normative standards for most Indigenous languages are much more recent than for majority languages, spelling and grammatical variation and errors are a common source of noise, and the application of a correction tool like ours can be useful in the construction of treebanks for these languages."
}
Markdown (Informal)
[How to Create Treebanks without Human Annotators – An Indigenous Language Grammar Checker for Treebank Construction](https://preview.aclanthology.org/corrections-2025-08/2025.tlt-1.14/) (Wiechetek et al., TLT-SyntaxFest 2025)
ACL