@inproceedings{sahin-steedman-2018-data,
title = "Data Augmentation via Dependency Tree Morphing for Low-Resource Languages",
author = {{\c{S}}ahin, G{\"o}zde G{\"u}l and
Steedman, Mark},
editor = "Riloff, Ellen and
Chiang, David and
Hockenmaier, Julia and
Tsujii, Jun{'}ichi",
booktitle = "Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing",
month = oct # "-" # nov,
year = "2018",
address = "Brussels, Belgium",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/Author-page-Marten-During-lu/D18-1545/",
doi = "10.18653/v1/D18-1545",
pages = "5004--5009",
abstract = "Neural NLP systems achieve high scores in the presence of sizable training dataset. Lack of such datasets leads to poor system performances in the case low-resource languages. We present two simple text augmentation techniques using dependency trees, inspired from image processing. We {\textquotedblleft}crop{\textquotedblright} sentences by removing dependency links, and we {\textquotedblleft}rotate{\textquotedblright} sentences by moving the tree fragments around the root. We apply these techniques to augment the training sets of low-resource languages in Universal Dependencies project. We implement a character-level sequence tagging model and evaluate the augmented datasets on part-of-speech tagging task. We show that crop and rotate provides improvements over the models trained with non-augmented data for majority of the languages, especially for languages with rich case marking systems."
}
Markdown (Informal)
[Data Augmentation via Dependency Tree Morphing for Low-Resource Languages](https://preview.aclanthology.org/Author-page-Marten-During-lu/D18-1545/) (Şahin & Steedman, EMNLP 2018)
ACL