@inproceedings{dehouck-2023-challenging,
title = "Challenging the {\textquotedblleft}One Single Vector per Token{\textquotedblright} Assumption",
author = "Dehouck, Mathieu",
editor = "Jiang, Jing and
Reitter, David and
Deng, Shumin",
booktitle = "Proceedings of the 27th Conference on Computational Natural Language Learning (CoNLL)",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.conll-1.33/",
doi = "10.18653/v1/2023.conll-1.33",
pages = "498--507",
abstract = "In this paper we question the almost universal assumption that in neural networks each token should be represented by a single vector. In fact, it is so natural to use one vector per word that most people do not even consider it as an assumption of their various models. Via a series of experiments on dependency parsing, in which we let each token in a sentence be represented by a sequence of vectors, we show that the {\textquotedblleft}one single vector per token{\textquotedblright} assumption might be too strong for recurrent neural networks. Indeed, biaffine parsers seem to work better when their encoder accesses its input`s tokens' representations in several time steps rather than all at once. This seems to indicate that having only one occasion to look at a token through its vector is too strong a constraint for recurrent neural networks and calls for further studies on the way tokens are fed to neural networks."
}
Markdown (Informal)
[Challenging the “One Single Vector per Token” Assumption](https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.conll-1.33/) (Dehouck, CoNLL 2023)
ACL