@inproceedings{yonghong-2024-shang,
title = "上古汉语分词和词性标注语料库的构建(Construction of {A}ncient {C}hinese Word Segmentation and Part-Of-Speech Corpus)",
author = "Yonghong, Ke",
editor = "Sun, Maosong and
Liang, Jiye and
Han, Xianpei and
Liu, Zhiyuan and
He, Yulan",
booktitle = "Proceedings of the 23rd Chinese National Conference on Computational Linguistics (Volume 1: Main Conference)",
month = jul,
year = "2024",
address = "Taiyuan, China",
publisher = "Chinese Information Processing Society of China",
url = "https://preview.aclanthology.org/fix-sig-urls/2024.ccl-1.64/",
pages = "819--829",
language = "zho",
abstract = "``针对国内尚无开放的大规模上古汉语分词及词性标注语料库可用的问题,提出以人工为主+机器辅助的标注模式,构建一个包括46部文献的上古汉语分词及词性标记语料库。描述了语料选择、文本分词、词性标注和质量控制等建库过程,分析了该语料库词长、词频、词用等分布,评估了标注质量。已经完成标注的语料库包括323余万字、217万余词。与EvaHan2022基测集和盲测集的分词、词性标注一致度分别为93.70{\%}、89.49{\%}和92.83{\%}、89.86{\%}。该语料库可用于古汉语研究、辞书编撰、语言教学、人工智能等多个领域。''"
}
Markdown (Informal)
[上古汉语分词和词性标注语料库的构建(Construction of Ancient Chinese Word Segmentation and Part-Of-Speech Corpus)](https://preview.aclanthology.org/fix-sig-urls/2024.ccl-1.64/) (Yonghong, CCL 2024)
ACL