@inproceedings{ruzsics-samardzic-2017-neural,
title = "Neural Sequence-to-sequence Learning of Internal Word Structure",
author = "Ruzsics, Tatyana and
Samard{\v{z}}i{\'c}, Tanja",
editor = "Levy, Roger and
Specia, Lucia",
booktitle = "Proceedings of the 21st Conference on Computational Natural Language Learning ({C}o{NLL} 2017)",
month = aug,
year = "2017",
address = "Vancouver, Canada",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/K17-1020/",
doi = "10.18653/v1/K17-1020",
pages = "184--194",
abstract = "Learning internal word structure has recently been recognized as an important step in various multilingual processing tasks and in theoretical language comparison. In this paper, we present a neural encoder-decoder model for learning canonical morphological segmentation. Our model combines character-level sequence-to-sequence transformation with a language model over canonical segments. We obtain up to 4{\%} improvement over a strong character-level encoder-decoder baseline for three languages. Our model outperforms the previous state-of-the-art for two languages, while eliminating the need for external resources such as large dictionaries. Finally, by comparing the performance of encoder-decoder and classical statistical machine translation systems trained with and without corpus counts, we show that including corpus counts is beneficial to both approaches."
}
Markdown (Informal)
[Neural Sequence-to-sequence Learning of Internal Word Structure](https://preview.aclanthology.org/fix-sig-urls/K17-1020/) (Ruzsics & Samardžić, CoNLL 2017)
ACL