@inproceedings{ji-etal-2021-spellbert,
title = "{S}pell{BERT}: A Lightweight Pretrained Model for {C}hinese Spelling Check",
author = "Ji, Tuo and
Yan, Hang and
Qiu, Xipeng",
editor = "Moens, Marie-Francine and
Huang, Xuanjing and
Specia, Lucia and
Yih, Scott Wen-tau",
booktitle = "Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2021",
address = "Online and Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2021.emnlp-main.287/",
doi = "10.18653/v1/2021.emnlp-main.287",
pages = "3544--3551",
abstract = "Chinese Spelling Check (CSC) is to detect and correct Chinese spelling errors. Many models utilize a predefined confusion set to learn a mapping between correct characters and its visually similar or phonetically similar misuses but the mapping may be out-of-domain. To that end, we propose SpellBERT, a pretrained model with graph-based extra features and independent on confusion set. To explicitly capture the two erroneous patterns, we employ a graph neural network to introduce radical and pinyin information as visual and phonetic features. For better fusing these features with character representations, we devise masked language model alike pre-training tasks. With this feature-rich pre-training, SpellBERT with only half size of BERT can show competitive performance and make a state-of-the-art result on the OCR dataset where most of the errors are not covered by the existing confusion set."
}
Markdown (Informal)
[SpellBERT: A Lightweight Pretrained Model for Chinese Spelling Check](https://preview.aclanthology.org/fix-sig-urls/2021.emnlp-main.287/) (Ji et al., EMNLP 2021)
ACL