@inproceedings{yang-li-2023-boosting,
title = "Boosting Text Augmentation via Hybrid Instance Filtering Framework",
author = "Yang, Heng and
Li, Ke",
editor = "Rogers, Anna and
Boyd-Graber, Jordan and
Okazaki, Naoaki",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2023",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2023.findings-acl.105/",
doi = "10.18653/v1/2023.findings-acl.105",
pages = "1652--1669",
abstract = "Text augmentation is an effective technique for addressing the problem of insufficient data in natural language processing. However, existing text augmentation methods tend to focus on few-shot scenarios and usually perform poorly on large public datasets. Our research indicates that existing augmentation methods often generate instances with shifted feature spaces, which leads to a drop in performance on the augmented data (for example, EDA generally loses approximately 2{\%} in aspect-based sentiment classification). To address this problem, we propose a hybrid instance-filtering framework (BoostAug) based on pre-trained language models that can maintain a similar feature space with natural datasets. BoostAug is transferable to existing text augmentation methods (such as synonym substitution and back translation) and significantly improves the augmentation performance by 2-3{\%} in classification accuracy. Our experimental results on three classification tasks and nine public datasets show that BoostAug addresses the performance drop problem and outperforms state-of-the-art text augmentation methods. Additionally, we release the code to help improve existing augmentation methods on large datasets."
}
Markdown (Informal)
[Boosting Text Augmentation via Hybrid Instance Filtering Framework](https://preview.aclanthology.org/fix-sig-urls/2023.findings-acl.105/) (Yang & Li, Findings 2023)
ACL