@article{guo-etal-2024-deuce,
title = "Deuce: Dual-diversity Enhancement and Uncertainty-awareness for Cold-start Active Learning",
author = "Guo, Jiaxin and
Philip Chen, C. L. and
Li, Shuzhen and
Zhang, Tong",
journal = "Transactions of the Association for Computational Linguistics",
volume = "12",
year = "2024",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://preview.aclanthology.org/Add-Cong-Liu-Florida-Atlantic-University-author-id/2024.tacl-1.94/",
doi = "10.1162/tacl_a_00731",
pages = "1736--1754",
abstract = "Cold-start active learning (CSAL) selects valuable instances from an unlabeled dataset for manual annotation. It provides high-quality data at a low annotation cost for label-scarce text classification. However, existing CSAL methods overlook weak classes and hard representative examples, resulting in biased learning. To address these issues, this paper proposes a novel dual-diversity enhancing and uncertainty-aware (Deuce) framework for CSAL. Specifically, Deuce leverages a pretrained language model (PLM) to efficiently extract textual representations, class predictions, and predictive uncertainty. Then, it constructs a Dual-Neighbor Graph (DNG) to combine information on both textual diversity and class diversity, ensuring a balanced data distribution. It further propagates uncertainty information via density-based clustering to select hard representative instances. Deuce performs well in selecting class-balanced and hard representative data by dual-diversity and informativeness. Experiments on six NLP datasets demonstrate the superiority and efficiency of Deuce."
}
Markdown (Informal)
[Deuce: Dual-diversity Enhancement and Uncertainty-awareness for Cold-start Active Learning](https://preview.aclanthology.org/Add-Cong-Liu-Florida-Atlantic-University-author-id/2024.tacl-1.94/) (Guo et al., TACL 2024)
ACL