@inproceedings{ustun-etal-2018-characters,
title = "Characters or Morphemes: How to Represent Words?",
author = {{\"U}st{\"u}n, Ahmet and
Kurfal{\i}, Murathan and
Can, Burcu},
editor = "Augenstein, Isabelle and
Cao, Kris and
He, He and
Hill, Felix and
Gella, Spandana and
Kiros, Jamie and
Mei, Hongyuan and
Misra, Dipendra",
booktitle = "Proceedings of the Third Workshop on Representation Learning for {NLP}",
month = jul,
year = "2018",
address = "Melbourne, Australia",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/W18-3019/",
doi = "10.18653/v1/W18-3019",
pages = "144--153",
abstract = "In this paper, we investigate the effects of using subword information in representation learning. We argue that using syntactic subword units effects the quality of the word representations positively. We introduce a morpheme-based model and compare it against to word-based, character-based, and character n-gram level models. Our model takes a list of candidate segmentations of a word and learns the representation of the word based on different segmentations that are weighted by an attention mechanism. We performed experiments on Turkish as a morphologically rich language and English with a comparably poorer morphology. The results show that morpheme-based models are better at learning word representations of morphologically complex languages compared to character-based and character n-gram level models since the morphemes help to incorporate more syntactic knowledge in learning, that makes morpheme-based models better at syntactic tasks."
}
Markdown (Informal)
[Characters or Morphemes: How to Represent Words?](https://preview.aclanthology.org/add-emnlp-2024-awards/W18-3019/) (Üstün et al., RepL4NLP 2018)
ACL
- Ahmet Üstün, Murathan Kurfalı, and Burcu Can. 2018. Characters or Morphemes: How to Represent Words?. In Proceedings of the Third Workshop on Representation Learning for NLP, pages 144–153, Melbourne, Australia. Association for Computational Linguistics.