@inproceedings{yun-etal-2023-focus,
title = "Focus on the Core: Efficient Attention via Pruned Token Compression for Document Classification",
author = "Yun, Jungmin and
Kim, Mihyeon and
Kim, Youngbin",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2023",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2023.findings-emnlp.909/",
doi = "10.18653/v1/2023.findings-emnlp.909",
pages = "13617--13628",
abstract = "Transformer-based models have achieved dominant performance in numerous NLP tasks. Despite their remarkable successes, pre-trained transformers such as BERT suffer from a computationally expensive self-attention mechanism that interacts with all tokens, including the ones unfavorable to classification performance. To overcome these challenges, we propose integrating two strategies: token pruning and token combining. Token pruning eliminates less important tokens in the attention mechanism{'}s key and value as they pass through the layers. Additionally, we adopt fuzzy logic to handle uncertainty and alleviate potential mispruning risks arising from an imbalanced distribution of each token{'}s importance. Token combining, on the other hand, condenses input sequences into smaller sizes in order to further compress the model. By integrating these two approaches, we not only improve the model{'}s performance but also reduce its computational demands. Experiments with various datasets demonstrate superior performance compared to baseline models, especially with the best improvement over the existing BERT model, achieving +5{\%}p in accuracy and +5.6{\%}p in F1 score. Additionally, memory cost is reduced to 0.61x, and a speedup of 1.64x is achieved."
}
Markdown (Informal)
[Focus on the Core: Efficient Attention via Pruned Token Compression for Document Classification](https://preview.aclanthology.org/fix-sig-urls/2023.findings-emnlp.909/) (Yun et al., Findings 2023)
ACL