@inproceedings{ding-etal-2020-chinese,
title = "{C}hinese Content Scoring: Open-Access Datasets and Features on Different Segmentation Levels",
author = "Ding, Yuning and
Horbach, Andrea and
Zesch, Torsten",
editor = "Wong, Kam-Fai and
Knight, Kevin and
Wu, Hua",
booktitle = "Proceedings of the 1st Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 10th International Joint Conference on Natural Language Processing",
month = dec,
year = "2020",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2020.aacl-main.37/",
doi = "10.18653/v1/2020.aacl-main.37",
pages = "347--357",
abstract = "In this paper, we analyse the challenges of Chinese content scoring in comparison to English. As a review of prior work for Chinese content scoring shows a lack of open-access data in the field, we present two short-answer data sets for Chinese. The Chinese Educational Short Answers data set (CESA) contains 1800 student answers for five science-related questions. As a second data set, we collected ASAP-ZH with 942 answers by re-using three existing prompts from the ASAP data set. We adapt a state-of-the-art content scoring system for Chinese and evaluate it in several settings on these data sets. Results show that features on lower segmentation levels such as character n-grams tend to have better performance than features on token level."
}
Markdown (Informal)
[Chinese Content Scoring: Open-Access Datasets and Features on Different Segmentation Levels](https://preview.aclanthology.org/fix-sig-urls/2020.aacl-main.37/) (Ding et al., AACL 2020)
ACL