@inproceedings{dehouck-gomez-rodriguez-2020-data,
title = "Data Augmentation via Subtree Swapping for Dependency Parsing of Low-Resource Languages",
author = "Dehouck, Mathieu and
G{\'o}mez-Rodr{\'i}guez, Carlos",
editor = "Scott, Donia and
Bel, Nuria and
Zong, Chengqing",
booktitle = "Proceedings of the 28th International Conference on Computational Linguistics",
month = dec,
year = "2020",
address = "Barcelona, Spain (Online)",
publisher = "International Committee on Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2020.coling-main.339/",
doi = "10.18653/v1/2020.coling-main.339",
pages = "3818--3830",
abstract = "The lack of annotated data is a big issue for building reliable NLP systems for most of the world{'}s languages. But this problem can be alleviated by automatic data generation. In this paper, we present a new data augmentation method for artificially creating new dependency-annotated sentences. The main idea is to swap subtrees between annotated sentences while enforcing strong constraints on those trees to ensure maximal grammaticality of the new sentences. We also propose a method to perform low-resource experiments using resource-rich languages by mimicking low-resource languages by sampling sentences under a low-resource distribution. In a series of experiments, we show that our newly proposed data augmentation method outperforms previous proposals using the same basic inputs."
}
Markdown (Informal)
[Data Augmentation via Subtree Swapping for Dependency Parsing of Low-Resource Languages](https://preview.aclanthology.org/fix-sig-urls/2020.coling-main.339/) (Dehouck & Gómez-Rodríguez, COLING 2020)
ACL