@inproceedings{von-prince-nordhoff-2020-empirical,
title = "An Empirical Evaluation of Annotation Practices in Corpora from Language Documentation",
author = "von Prince, Kilu and
Nordhoff, Sebastian",
editor = "Calzolari, Nicoletta and
B{\'e}chet, Fr{\'e}d{\'e}ric and
Blache, Philippe and
Choukri, Khalid and
Cieri, Christopher and
Declerck, Thierry and
Goggi, Sara and
Isahara, Hitoshi and
Maegaard, Bente and
Mariani, Joseph and
Mazo, H{\'e}l{\`e}ne and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Twelfth Language Resources and Evaluation Conference",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://preview.aclanthology.org/moar-dois/2020.lrec-1.338/",
pages = "2778--2787",
language = "eng",
ISBN = "979-10-95546-34-4",
abstract = "For most of the world{'}s languages, no primary data are available, even as many languages are disappearing. Throughout the last two decades, however, language documentation projects have produced substantial amounts of primary data from a wide variety of endangered languages. These resources are still in the early days of their exploration. One of the factors that makes them hard to use is a relative lack of standardized annotation conventions. In this paper, we will describe common practices in existing corpora in order to facilitate their future processing. After a brief introduction of the main formats used for annotation files, we will focus on commonly used tiers in the widespread ELAN and Toolbox formats. Minimally, corpora from language documentation contain a transcription tier and an aligned translation tier, which means they constitute parallel corpora. Additional common annotations include named references, morpheme separation, morpheme-by-morpheme glosses, part-of-speech tags and notes."
}
Markdown (Informal)
[An Empirical Evaluation of Annotation Practices in Corpora from Language Documentation](https://preview.aclanthology.org/moar-dois/2020.lrec-1.338/) (von Prince & Nordhoff, LREC 2020)
ACL