@inproceedings{huang-hollenstein-2023-long,
title = "Long-Range Language Modeling with Selective Cache",
author = "Huang, Xinting and
Hollenstein, Nora",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2023",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2023.findings-emnlp.321/",
doi = "10.18653/v1/2023.findings-emnlp.321",
pages = "4838--4858",
abstract = "The computational cost of transformer-based language models grows quadratically with the sequence length. In this paper, we introduce the selective cache, which stores the selected key-value pairs from the previous context. By selecting important key-value pairs the model makes better use of the cache so that in limited cache size, a longer context history can be stored. We design three kinds of selection methods. The first is based on human language processing. The key-value pairs are selected if they correspond to tokens that are fixated longer, as recorded in eye-tracking-while-reading experiments. We also incorporate the cognitively-inspired selection process into the language model as a trainable process, resulting in two additional methods with improved performance. The selection task is converted into a pruning task so they can be trained with differentiable masks. We demonstrate that the proposed selective cache improves the language modeling performance across different datasets. With the same number of stored key-value pairs (cache size), our selective cache outperforms XL cache and compressive cache by considerable margins."
}
Markdown (Informal)
[Long-Range Language Modeling with Selective Cache](https://preview.aclanthology.org/fix-sig-urls/2023.findings-emnlp.321/) (Huang & Hollenstein, Findings 2023)
ACL