@article{si-etal-2023-sub, title = "Sub-Character Tokenization for {C}hinese Pretrained Language Models", author = "Si, Chenglei and Zhang, Zhengyan and Chen, Yingfa and Qi, Fanchao and Wang, Xiaozhi and Liu, Zhiyuan and Wang, Yasheng and Liu, Qun and Sun, Maosong", journal = "Transactions of the Association for Computational Linguistics", volume = "11", year = "2023", address = "Cambridge, MA", publisher = "MIT Press", url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.tacl-1.28/", doi = "10.1162/tacl_a_00560", pages = "469--487" }