@inproceedings{li-etal-2025-improving-efficiency,
title = "Improving Efficiency in Large Language Models via Extendable Block Floating Point Representation",
author = "Li, Dongyang and
Li, Zeyang and
Liu, Bosheng and
Wu, Jigang",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/display_plenaries/2025.findings-acl.768/",
pages = "14861--14873",
ISBN = "979-8-89176-256-5",
abstract = "Large language models (LLMs) have revolutionized natural language processing (NLP) tasks, yet their increasing size poses substantial challenges in terms of computational and memory resources. Block floating-point (BFP) arithmetic offers an effective solution by leveraging the strengths of both floating-point and fixed-point representations, leading to reductions in both storage and computational overhead. However, current low-bit BFP quantization approaches often struggle to handle extreme outliers, leading to significant accuracy degradation. To overcome this limitation, we introduce Extendable Exponent Sharing (EES), a novel BFP representation that extends the exponent bit width to capture a wider dynamic range. EES achieves this by embedding extendable exponent bits into the least significant mantissa bits, thereby increasing the shared exponent{'}s bit width without incurring additional storage costs. To optimize the trade-off between accuracy and energy efficiency, EES employs a design space exploration strategy to optimize the configuration of extendable exponent bit widths. Experimental results show that EES outperforms representative baselines in both accuracy and computational efficiency."
}
Markdown (Informal)
[Improving Efficiency in Large Language Models via Extendable Block Floating Point Representation](https://preview.aclanthology.org/display_plenaries/2025.findings-acl.768/) (Li et al., Findings 2025)
ACL