@article{ding-etal-2026-self,
title = "Self-supervised Data Augmentation for Text Classification in Low-Data Settings",
author = "Ding, Deyu and
Wang, Mengying and
Spitz, Andreas",
editor = "Piperidis, Stelios and
Bel, N{\'u}ria and
van den Heuvel, Henk and
Ide, Nancy and
Krek, Simon and
Toral, Antonio",
journal = "International Conference on Language Resources and Evaluation",
volume = "main",
month = may,
year = "2026",
address = "Palma de Mallorca, Spain",
publisher = "ELRA Language Resource Association",
url = "https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.788/",
pages = "10046--10056",
abstract = "Due to data sparsity and high annotation cost, data augmentation has established itself as an effective tool for boosting model performance on supervised NLP tasks. Where task-agnostic augmentation methods tend to act as simple regularizers for the data, task-aware methods also leverage labels for the generation of data that are most suitable for downstream tasks. While prior work has investigated generation and sampling strategies individually, the potential of a self-supervised approach that leverages multiple pre-trained models in generation and sampling remains underexplored. To address this issue, we present an ensemble-based framework of language models that proposes augmentation candidates and internally reviews their suitability for low-resource text classification tasks. We evaluate our model on six classification benchmarks and find that it consistently outperforms state-of-the-art data augmentation baselines in classification accuracy by an average of 0.97 points in low-data scenarios."
}Markdown (Informal)
[Self-supervised Data Augmentation for Text Classification in Low-Data Settings](https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.788/) (Ding et al., LREC 2026)
ACL