@inproceedings{chou-etal-2023-advancing,
title = "Advancing Multi-Criteria {C}hinese Word Segmentation Through Criterion Classification and Denoising",
author = "Chou, Tzu Hsuan and
Lin, Chun-Yi and
Kao, Hung-Yu",
editor = "Rogers, Anna and
Boyd-Graber, Jordan and
Okazaki, Naoaki",
booktitle = "Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2023.acl-long.356/",
doi = "10.18653/v1/2023.acl-long.356",
pages = "6460--6476",
abstract = "Recent research on multi-criteria Chinese word segmentation (MCCWS) mainly focuses on building complex private structures, adding more handcrafted features, or introducing complex optimization processes. In this work, we show that through a simple yet elegant input-hint-based MCCWS model, we can achieve state-of-the-art (SoTA) performances on several datasets simultaneously. We further propose a novel criterion-denoising objective that hurts slightly on F1 score but achieves SoTA recall on out-of-vocabulary words. Our result establishes a simple yet strong baseline for future MCCWS research. Source code is available at \url{https://github.com/IKMLab/MCCWS}."
}
Markdown (Informal)
[Advancing Multi-Criteria Chinese Word Segmentation Through Criterion Classification and Denoising](https://preview.aclanthology.org/fix-sig-urls/2023.acl-long.356/) (Chou et al., ACL 2023)
ACL