@inproceedings{li-etal-2022-unsupervised,
title = "Unsupervised {C}hinese Word Segmentation with {BERT} Oriented Probing and Transformation",
author = "Li, Wei and
Song, Yuhan and
Su, Qi and
Shao, Yanqiu",
editor = "Muresan, Smaranda and
Nakov, Preslav and
Villavicencio, Aline",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2022",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2022.findings-acl.310/",
doi = "10.18653/v1/2022.findings-acl.310",
pages = "3935--3940",
abstract = "Word Segmentation is a fundamental step for understanding Chinese language. Previous neural approaches for unsupervised Chinese Word Segmentation (CWS) only exploits shallow semantic information, which can miss important context. Large scale Pre-trained language models (PLM) have achieved great success in many areas because of its ability to capture the deep contextual semantic relation. In this paper, we propose to take advantage of the deep semantic information embedded in PLM (e.g., BERT) with a self-training manner, which iteratively probes and transforms the semantic information in PLM into explicit word segmentation ability. Extensive experiment results show that our proposed approach achieves state-of-the-art F1 score on two CWS benchmark datasets."
}
Markdown (Informal)
[Unsupervised Chinese Word Segmentation with BERT Oriented Probing and Transformation](https://preview.aclanthology.org/fix-sig-urls/2022.findings-acl.310/) (Li et al., Findings 2022)
ACL