@inproceedings{melamud-etal-2019-combining,
title = "Combining Unsupervised Pre-training and Annotator Rationales to Improve Low-shot Text Classification",
author = "Melamud, Oren and
Bornea, Mihaela and
Barker, Ken",
editor = "Inui, Kentaro and
Jiang, Jing and
Ng, Vincent and
Wan, Xiaojun",
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)",
month = nov,
year = "2019",
address = "Hong Kong, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/D19-1401/",
doi = "10.18653/v1/D19-1401",
pages = "3884--3893",
abstract = "Supervised learning models often perform poorly at low-shot tasks, i.e. tasks for which little labeled data is available for training. One prominent approach for improving low-shot learning is to use unsupervised pre-trained neural models. Another approach is to obtain richer supervision by collecting annotator rationales (explanations supporting label annotations). In this work, we combine these two approaches to improve low-shot text classification with two novel methods: a simple bag-of-words embedding approach; and a more complex context-aware method, based on the BERT model. In experiments with two English text classification datasets, we demonstrate substantial performance gains from combining pre-training with rationales. Furthermore, our investigation of a range of train-set sizes reveals that the simple bag-of-words approach is the clear top performer when there are only a few dozen training instances or less, while more complex models, such as BERT or CNN, require more training data to shine."
}
Markdown (Informal)
[Combining Unsupervised Pre-training and Annotator Rationales to Improve Low-shot Text Classification](https://preview.aclanthology.org/add-emnlp-2024-awards/D19-1401/) (Melamud et al., EMNLP-IJCNLP 2019)
ACL