@inproceedings{sato-etal-2008-automatic,
title = "Automatic Assessment of {J}apanese Text Readability Based on a Textbook Corpus",
author = "Sato, Satoshi and
Matsuyoshi, Suguru and
Kondoh, Yohsuke",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Piperidis, Stelios and
Tapias, Daniel",
booktitle = "Proceedings of the Sixth International Conference on Language Resources and Evaluation ({LREC}'08)",
month = may,
year = "2008",
address = "Marrakech, Morocco",
publisher = "European Language Resources Association (ELRA)",
url = "https://preview.aclanthology.org/fix-sig-urls/L08-1230/",
abstract = "This paper describes a method of readability measurement of Japanese texts based on a newly compiled textbook corpus. The textbook corpus consists of 1,478 sample passages extracted from 127 textbooks of elementary school, junior high school, high school, and university; it is divided into thirteen grade levels and the total size is about a million characters. For a given text passage, the readability measurement method determines the grade level to which the passage is the most similar by using character-unigram models, which are constructed from the textbook corpus. Because this method does not require sentence-boundary analysis and word-boundary analysis, it is applicable to texts that include incomplete sentences and non-regular text fragments. The performance of this method, which is measured by the correlation coefficient, is considerably high (R {\ensuremath{>}} 0.9); in case that the length of a text passage is limited in 25 characters, the correlation coefficient is still high (R = 0.83)."
}
Markdown (Informal)
[Automatic Assessment of Japanese Text Readability Based on a Textbook Corpus](https://preview.aclanthology.org/fix-sig-urls/L08-1230/) (Sato et al., LREC 2008)
ACL