@inproceedings{zhang-li-2025-improving,
title = "Improving Clustering with Positive Pairs Generated from {LLM}-Driven Labels",
author = "Zhang, Xiaotong and
Li, Ying",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.emnlp-main.613/",
doi = "10.18653/v1/2025.emnlp-main.613",
pages = "12213--12229",
ISBN = "979-8-89176-332-6",
abstract = "Traditional unsupervised clustering methods, which often rely on contrastive training of embedders, suffer from a lack of label knowledge, resulting in suboptimal performance. Furthermore, the presence of potential false negatives can destabilize the training process. Hence, we propose to improve clustering with Positive Pairs generated from LLM-driven Labels (PPLL). In the proposed framework, LLM is initially employed to cluster the data and generate corresponding mini-cluster labels. Subsequently, positive pairs are constructed based on these labels, and an embedder is trained using BYOL to obviate the need for negative pairs. Following training, the acquired label knowledge is integrated into K-means clustering. This framework enables the integration of label information throughout the training and inference processes, while mitigating the reliance on negative pairs. Additionally, it generates interpretable labels for improved understanding of clustering results. Empirical evaluations on a range of datasets demonstrate that our proposed framework consistently surpasses state-of-the-art baselines, achieving superior performance, robustness, and computational efficiency for diverse text clustering applications."
}Markdown (Informal)
[Improving Clustering with Positive Pairs Generated from LLM-Driven Labels](https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.emnlp-main.613/) (Zhang & Li, EMNLP 2025)
ACL