@inproceedings{wu-etal-2022-text,
title = "Text Smoothing: Enhance Various Data Augmentation Methods on Text Classification Tasks",
author = "Wu, Xing and
Gao, Chaochen and
Lin, Meng and
Zang, Liangjun and
Hu, Songlin",
editor = "Muresan, Smaranda and
Nakov, Preslav and
Villavicencio, Aline",
booktitle = "Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2022.acl-short.97/",
doi = "10.18653/v1/2022.acl-short.97",
pages = "871--875",
abstract = "Before entering the neural network, a token needs to be converted to its one-hot representation, which is a discrete distribution of the vocabulary. Smoothed representation is the probability of candidate tokens obtained from the pre-trained masked language model, which can be seen as a more informative augmented substitution to the one-hot representation. We propose an efficient data augmentation method, dub as text smoothing, by converting a sentence from its one-hot representation to controllable smoothed representation. We evaluate text smoothing on different datasets in a low-resource regime. Experimental results show that text smoothing outperforms various mainstream data augmentation methods by a substantial margin. Moreover, text smoothing can be combined with these data augmentation methods to achieve better performance."
}
Markdown (Informal)
[Text Smoothing: Enhance Various Data Augmentation Methods on Text Classification Tasks](https://preview.aclanthology.org/add-emnlp-2024-awards/2022.acl-short.97/) (Wu et al., ACL 2022)
ACL