@inproceedings{nguyen-etal-2017-sub,
title = "Sub-character Neural Language Modelling in {J}apanese",
author = "Nguyen, Viet and
Brooke, Julian and
Baldwin, Timothy",
editor = "Faruqui, Manaal and
Schuetze, Hinrich and
Trancoso, Isabel and
Yaghoobzadeh, Yadollah",
booktitle = "Proceedings of the First Workshop on Subword and Character Level Models in {NLP}",
month = sep,
year = "2017",
address = "Copenhagen, Denmark",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/W17-4122/",
doi = "10.18653/v1/W17-4122",
pages = "148--153",
abstract = "In East Asian languages such as Japanese and Chinese, the semantics of a character are (somewhat) reflected in its sub-character elements. This paper examines the effect of using sub-characters for language modeling in Japanese. This is achieved by decomposing characters according to a range of character decomposition datasets, and training a neural language model over variously decomposed character representations. Our results indicate that language modelling can be improved through the inclusion of sub-characters, though this result depends on a good choice of decomposition dataset and the appropriate granularity of decomposition."
}
Markdown (Informal)
[Sub-character Neural Language Modelling in Japanese](https://preview.aclanthology.org/jlcl-multiple-ingestion/W17-4122/) (Nguyen et al., SCLeM 2017)
ACL
- Viet Nguyen, Julian Brooke, and Timothy Baldwin. 2017. Sub-character Neural Language Modelling in Japanese. In Proceedings of the First Workshop on Subword and Character Level Models in NLP, pages 148–153, Copenhagen, Denmark. Association for Computational Linguistics.