@inproceedings{yang-etal-2024-hs,
title = "{HS}-{GC}: Holistic Semantic Embedding and Global Contrast for Effective Text Clustering",
author = "Yang, Chen and
Cao, Bin and
Fan, Jing",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://preview.aclanthology.org/fix-sig-urls/2024.lrec-main.732/",
pages = "8349--8359",
abstract = "In this paper, we introduce Holistic Semantic Embedding and Global Contrast (HS-GC), an end-to-end approach to learn the instance- and cluster-level representation. Specifically, for instance-level representation learning, we introduce a new loss function that exploits different layers of semantic information in a deep neural network to provide a more holistic semantic text representation. Contrastive learning is applied to these representations to improve the model{'}s ability to represent text instances. Additionally, for cluster-level representation learning we propose two strategies that utilize global update to construct cluster centers from a global view. The extensive experimental evaluation on five text datasets shows that our method outperforms the state-of-the-art model. Particularly on the SearchSnippets dataset, our method leads by 4.4{\%} in normalized mutual information against the latest comparison method. On the StackOverflow and TREC datasets, our method improves the clustering accuracy of 5.9{\%} and 3.2{\%}, respectively."
}
Markdown (Informal)
[HS-GC: Holistic Semantic Embedding and Global Contrast for Effective Text Clustering](https://preview.aclanthology.org/fix-sig-urls/2024.lrec-main.732/) (Yang et al., LREC-COLING 2024)
ACL