@inproceedings{murawaki-kurohashi-2010-online,
title = "Online {J}apanese Unknown Morpheme Detection using Orthographic Variation",
author = "Murawaki, Yugo and
Kurohashi, Sadao",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Piperidis, Stelios and
Rosner, Mike and
Tapias, Daniel",
booktitle = "Proceedings of the Seventh International Conference on Language Resources and Evaluation ({LREC}'10)",
month = may,
year = "2010",
address = "Valletta, Malta",
publisher = "European Language Resources Association (ELRA)",
url = "https://preview.aclanthology.org/fix-sig-urls/L10-1111/",
abstract = "To solve the unknown morpheme problem in Japanese morphological analysis, we previously proposed a novel framework of online unknown morpheme acquisition and its implementation. This framework poses a previously unexplored problem, online unknown morpheme detection. Online unknown morpheme detection is a task of finding morphemes in each sentence that are not listed in a given lexicon. Unlike in English, it is a non-trivial task because Japanese does not delimit words by white space. We first present a baseline method that simply uses the output of the morphological analyzer. We then show that it fails to detect some unknown morphemes because they are over-segmented into shorter registered morphemes. To cope with this problem, we present a simple solution, the use of orthographic variation of Japanese. Under the assumption that orthographic variants behave similarly, each over-segmentation candidate is checked against its counterparts. Experiments show that the proposed method improves the recall of detection and contributes to improving unknown morpheme acquisition."
}
Markdown (Informal)
[Online Japanese Unknown Morpheme Detection using Orthographic Variation](https://preview.aclanthology.org/fix-sig-urls/L10-1111/) (Murawaki & Kurohashi, LREC 2010)
ACL