@inproceedings{huang-etal-2026-beft,
title = "{BEFT}: Bias-Efficient Fine-Tuning of Language Models in Low-Data Regimes",
author = "Huang, Baichuan and
Balashankar, Ananth and
Aminifar, Amir",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.acl-long.1799/",
pages = "38833--38851",
ISBN = "979-8-89176-390-6",
abstract = "Fine-tuning the bias terms of large language models (LLMs) has the potential to achieve unprecedented parameter efficiency while maintaining competitive performance, particularly in low-data regimes. However, the link between fine-tuning different bias terms (i.e., $\boldsymbol{b}_q$, $\boldsymbol{b}_k$, $\boldsymbol{b}_v$ in the query, key, or value projections) and downstream performance remains largely unclear to date. In this paper, we investigate the link between fine-tuning $\boldsymbol{b}_q$, $\boldsymbol{b}_k$, $\boldsymbol{b}_v$ with the performance of the downstream task. Our key finding is that *directly fine-tuning $\boldsymbol{b}_v$ generally leads to higher downstream performance in low-data regimes, in comparison to $\boldsymbol{b}_q$ and $\boldsymbol{b}_k$*. We extensively evaluate this unique property across a wide range of LLMs spanning encoder-only and decoder-only architectures up to 6.7B parameters (including bias-free LLMs). Our results provide strong evidence for the effectiveness of directly fine-tuning $\boldsymbol{b}_v$ across various downstream tasks. The implementation code is available at https://github.com/whubaichuan/BEFT."
}Markdown (Informal)
[BEFT: Bias-Efficient Fine-Tuning of Language Models in Low-Data Regimes](https://preview.aclanthology.org/ingest-acl/2026.acl-long.1799/) (Huang et al., ACL 2026)
ACL