@article{guo-etal-2024-deuce,
title = "{DEUCE}: Dual-diversity Enhancement and Uncertainty-awareness for Cold-start Active Learning",
author = "Guo, Jiaxin and
Chen, C. L. Philip and
Li, Shuzhen and
Zhang, Tong",
journal = "Transactions of the Association for Computational Linguistics",
volume = "12",
year = "2024",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.tacl-1.94/",
doi = "10.1162/tacl_a_00731",
pages = "1736--1754",
abstract = "Cold-start active learning (CSAL) selects valuable instances from an unlabeled dataset for manual annotation. It provides high-quality data at a low annotation cost for label-scarce text classification. However, existing CSAL methods overlook weak classes and hard representative examples, resulting in biased learning. To address these issues, this paper proposes a novel dual-diversity enhancing and uncertainty-aware (DEUCE) framework for CSAL. Specifically, DEUCE leverages a pretrained language model (PLM) to efficiently extract textual representations, class predictions, and predictive uncertainty. Then, it constructs a Dual-Neighbor Graph (DNG) to combine information on both textual diversity and class diversity, ensuring a balanced data distribution. It further propagates uncertainty information via density-based clustering to select hard representative instances. DEUCE performs well in selecting class-balanced and hard representative data by dual-diversity and informativeness. Experiments on six NLP datasets demonstrate the superiority and efficiency of DEUCE."
}
Markdown (Informal)
[DEUCE: Dual-diversity Enhancement and Uncertainty-awareness for Cold-start Active Learning](https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.tacl-1.94/) (Guo et al., TACL 2024)
ACL