@inproceedings{zhang-etal-2023-pieclass,
title = "{PIEC}lass: Weakly-Supervised Text Classification with Prompting and Noise-Robust Iterative Ensemble Training",
author = "Zhang, Yunyi and
Jiang, Minhao and
Meng, Yu and
Zhang, Yu and
Han, Jiawei",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2023.emnlp-main.780/",
doi = "10.18653/v1/2023.emnlp-main.780",
pages = "12655--12670",
abstract = "Weakly-supervised text classification trains a classifier using the label name of each target class as the only supervision, which largely reduces human annotation efforts. Most existing methods first use the label names as static keyword-based features to generate pseudo labels, which are then used for final classifier training. While reasonable, such a commonly adopted framework suffers from two limitations: (1) keywords can have different meanings in different contexts and some text may not have any keyword, so keyword matching can induce noisy and inadequate pseudo labels; (2) the errors made in the pseudo label generation stage will directly propagate to the classifier training stage without a chance of being corrected. In this paper, we propose a new method, PIEClass, consisting of two modules: (1) a pseudo label acquisition module that uses zero-shot prompting of pre-trained language models (PLM) to get pseudo labels based on contextualized text understanding beyond static keyword matching, and (2) a noise-robust iterative ensemble training module that iteratively trains classifiers and updates pseudo labels by utilizing two PLM fine-tuning methods that regularize each other. Extensive experiments show that PIEClass achieves overall better performance than existing strong baselines on seven benchmark datasets and even achieves similar performance to fully-supervised classifiers on sentiment classification tasks."
}
Markdown (Informal)
[PIEClass: Weakly-Supervised Text Classification with Prompting and Noise-Robust Iterative Ensemble Training](https://preview.aclanthology.org/add-emnlp-2024-awards/2023.emnlp-main.780/) (Zhang et al., EMNLP 2023)
ACL