@inproceedings{ek-bernardy-2020-composing,
title = "Composing Byte-Pair Encodings for Morphological Sequence Classification",
author = "Ek, Adam and
Bernardy, Jean-Philippe",
editor = "de Marneffe, Marie-Catherine and
de Lhoneux, Miryam and
Nivre, Joakim and
Schuster, Sebastian",
booktitle = "Proceedings of the Fourth Workshop on Universal Dependencies (UDW 2020)",
month = dec,
year = "2020",
address = "Barcelona, Spain (Online)",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/landing_page/2020.udw-1.9/",
pages = "76--86",
abstract = "Byte-pair encodings is a method for splitting a word into sub-word tokens, a language model then assigns contextual representations separately to each of these tokens. In this paper, we evaluate four different methods of composing such sub-word representations into word representations. We evaluate the methods on morphological sequence classification, the task of predicting grammatical features of a word. Our experiments reveal that using an RNN to compute word representations is consistently more effective than the other methods tested across a sample of eight languages with different typology and varying numbers of byte-pair tokens per word."
}
Markdown (Informal)
[Composing Byte-Pair Encodings for Morphological Sequence Classification](https://preview.aclanthology.org/landing_page/2020.udw-1.9/) (Ek & Bernardy, UDW 2020)
ACL