@inproceedings{geng-etal-2021-fasthan,
title = "fast{H}an: A {BERT}-based Multi-Task Toolkit for {C}hinese {NLP}",
author = "Geng, Zhichao and
Yan, Hang and
Qiu, Xipeng and
Huang, Xuanjing",
editor = "Ji, Heng and
Park, Jong C. and
Xia, Rui",
booktitle = "Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing: System Demonstrations",
month = aug,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2021.acl-demo.12/",
doi = "10.18653/v1/2021.acl-demo.12",
pages = "99--106",
abstract = "We present fastHan, an open-source toolkit for four basic tasks in Chinese natural language processing: Chinese word segmentation (CWS), Part-of-Speech (POS) tagging, named entity recognition (NER), and dependency parsing. The backbone of fastHan is a multi-task model based on a pruned BERT, which uses the first 8 layers in BERT. We also provide a 4-layer base model compressed from the 8-layer model. The joint-model is trained and evaluated on 13 corpora of four tasks, yielding near state-of-the-art (SOTA) performance in dependency parsing and NER, achieving SOTA performance in CWS and POS. Besides, fastHan{'}s transferability is also strong, performing much better than popular segmentation tools on a non-training corpus. To better meet the need of practical application, we allow users to use their own labeled data to further fine-tune fastHan. In addition to its small size and excellent performance, fastHan is user-friendly. Implemented as a python package, fastHan isolates users from the internal technical details and is convenient to use. The project is released on Github."
}
Markdown (Informal)
[fastHan: A BERT-based Multi-Task Toolkit for Chinese NLP](https://preview.aclanthology.org/fix-sig-urls/2021.acl-demo.12/) (Geng et al., ACL-IJCNLP 2021)
ACL
- Zhichao Geng, Hang Yan, Xipeng Qiu, and Xuanjing Huang. 2021. fastHan: A BERT-based Multi-Task Toolkit for Chinese NLP. In Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing: System Demonstrations, pages 99–106, Online. Association for Computational Linguistics.