@inproceedings{wang-etal-2023-goal,
title = "Goal-Driven Explainable Clustering via Language Descriptions",
author = "Wang, Zihan and
Shang, Jingbo and
Zhong, Ruiqi",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.emnlp-main.657/",
doi = "10.18653/v1/2023.emnlp-main.657",
pages = "10626--10649",
abstract = "Unsupervised clustering is widely used to explore large corpora, but existing formulations neither consider the users' goals nor explain clusters' meanings. We propose a new task formulation, {\textquotedblleft}Goal-Driven Clustering with Explanations{\textquotedblright} (GoalEx), which represents both the goal and the explanations as free-form language descriptions. For example, to categorize the errors made by a summarization system, the input to GoalEx is a corpus of annotator-written comments for system-generated summaries and a goal description {\textquotedblleft}cluster the comments based on why the annotators think the summary is imperfect.{\textquotedblright}; the outputs are text clusters each with an explanation ({\textquotedblleft}this cluster mentions that the summary misses important context information.{\textquotedblright}), which relates to the goal and accurately explains which comments should (not) belong to a cluster. To tackle GoalEx, we prompt a language model with {\textquotedblleft}[corpus subset] + [goal] + Brainstorm a list of explanations each representing a cluster.{\textquotedblright}; then we classify whether each sample belongs to a cluster based on its explanation; finally, we use integer linear programming to select a subset of candidate clusters to cover most samples while minimizing overlaps. Under both automatic and human evaluation on corpora with or without labels, our method produces more accurate and goal-related explanations than prior methods."
}
Markdown (Informal)
[Goal-Driven Explainable Clustering via Language Descriptions](https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.emnlp-main.657/) (Wang et al., EMNLP 2023)
ACL