@inproceedings{blouin-etal-2024-dataset,
title = "A Dataset for Named Entity Recognition and Entity Linking in {C}hinese Historical Newspapers",
author = "Blouin, Baptiste and
Armand, C{\'e}cile and
Henriot, Christian",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://preview.aclanthology.org/fix-sig-urls/2024.lrec-main.35/",
pages = "385--394",
abstract = "In this study, we present a novel historical Chinese dataset for named entity recognition, entity linking, coreference and entity relations. We use data from Chinese newspapers from 1872 to 1949 and multilingual bibliographic resources from the same period. The period and the language are the main strength of the present work, offering a resource which covers different styles and language uses, as well as the largest historical Chinese NER dataset with manual annotations from this transitional period. After detailing the selection and annotation process, we present the very first results that can be obtained from this dataset. Texts and annotations are freely downloadable from the GitHub repository."
}
Markdown (Informal)
[A Dataset for Named Entity Recognition and Entity Linking in Chinese Historical Newspapers](https://preview.aclanthology.org/fix-sig-urls/2024.lrec-main.35/) (Blouin et al., LREC-COLING 2024)
ACL