@inproceedings{wang-li-2024-two,
title = "Two Sequence Labeling Approaches to Sentence Segmentation and Punctuation Prediction for Classic {C}hinese Texts",
author = "Wang, Xuebin and
Li, Zhenghua",
editor = "Sprugnoli, Rachele and
Passarotti, Marco",
booktitle = "Proceedings of the Third Workshop on Language Technologies for Historical and Ancient Languages (LT4HALA) @ LREC-COLING-2024",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2024.lt4hala-1.28/",
pages = "237--241",
abstract = "This paper describes our system for the EvaHan2024 shared task. We design and experiment with two sequence labeling approaches, i.e., one-stage and two-stage approaches. The one-stage approach directly predicts a label for each character, and the label may contain multiple punctuation marks. The two-stage approach divides punctuation marks into two classes, i.e., pause and non-pause, and separately handles them via two sequence labeling processes. The labels contain at most one punctuation marks. We use pre-trained SikuRoBERTa as a key component of the encoder and employ a conditional random field (CRF) layer on the top. According to the evaluation metrics adopted by the organizers, the two-stage approach is superior to the one-stage approach, and our system achieves the second place among all participant systems."
}
Markdown (Informal)
[Two Sequence Labeling Approaches to Sentence Segmentation and Punctuation Prediction for Classic Chinese Texts](https://preview.aclanthology.org/add-emnlp-2024-awards/2024.lt4hala-1.28/) (Wang & Li, LT4HALA 2024)
ACL