@inproceedings{lee-etal-2024-qeft,
title = "{QEFT}: Quantization for Efficient Fine-Tuning of {LLM}s",
author = "Lee, Changhun and
Jin, Jun-gyu and
Cho, YoungHyun and
Park, Eunhyeok",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2024.findings-emnlp.811/",
doi = "10.18653/v1/2024.findings-emnlp.811",
pages = "13823--13837",
abstract = "With the rapid growth in the use of fine-tuning for large language models (LLMs), optimizing fine-tuning while keeping inference efficient has become highly important. However, this is a challenging task as it requires improvements in all aspects, including inference speed, fine-tuning speed, memory consumption, and, most importantly, model quality. Previous studies have attempted to achieve this by combining quantization with fine-tuning, but they have failed to enhance all four aspects simultaneously. In this study, we propose a new lightweight technique called Quantization for Efficient Fine-Tuning (QEFT). QEFT accelerates both inference and fine-tuning, is supported by robust theoretical foundations, offers high flexibility, and maintains good hardware compatibility. Our extensive experiments demonstrate that QEFT matches the quality and versatility of full-precision parameter-efficient fine-tuning, while using fewer resources. Our code is available at https://github.com/xvyaward/qeft."
}
Markdown (Informal)
[QEFT: Quantization for Efficient Fine-Tuning of LLMs](https://preview.aclanthology.org/fix-sig-urls/2024.findings-emnlp.811/) (Lee et al., Findings 2024)
ACL
- Changhun Lee, Jun-gyu Jin, YoungHyun Cho, and Eunhyeok Park. 2024. QEFT: Quantization for Efficient Fine-Tuning of LLMs. In Findings of the Association for Computational Linguistics: EMNLP 2024, pages 13823–13837, Miami, Florida, USA. Association for Computational Linguistics.