@inproceedings{peters-martins-2022-beyond,
title = "Beyond Characters: Subword-level Morpheme Segmentation",
author = "Peters, Ben and
Martins, Andre F. T.",
editor = "Nicolai, Garrett and
Chodroff, Eleanor",
booktitle = "Proceedings of the 19th SIGMORPHON Workshop on Computational Research in Phonetics, Phonology, and Morphology",
month = jul,
year = "2022",
address = "Seattle, Washington",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2022.sigmorphon-1.14/",
doi = "10.18653/v1/2022.sigmorphon-1.14",
pages = "131--138",
abstract = "This paper presents DeepSPIN`s submissions to the SIGMORPHON 2022 Shared Task on Morpheme Segmentation. We make three submissions, all to the word-level subtask. First, we show that entmax-based sparse sequence-tosequence models deliver large improvements over conventional softmax-based models, echoing results from other tasks. Then, we challenge the assumption that models for morphological tasks should be trained at the character level by building a transformer that generates morphemes as sequences of unigram language model-induced subwords. This subword transformer outperforms all of our character-level models and wins the word-level subtask. Although we do not submit an official submission to the sentence-level subtask, we show that this subword-based approach is highly effective there as well."
}
Markdown (Informal)
[Beyond Characters: Subword-level Morpheme Segmentation](https://preview.aclanthology.org/jlcl-multiple-ingestion/2022.sigmorphon-1.14/) (Peters & Martins, SIGMORPHON 2022)
ACL
- Ben Peters and Andre F. T. Martins. 2022. Beyond Characters: Subword-level Morpheme Segmentation. In Proceedings of the 19th SIGMORPHON Workshop on Computational Research in Phonetics, Phonology, and Morphology, pages 131–138, Seattle, Washington. Association for Computational Linguistics.