@inproceedings{shen-huang-2025-llm,
title = "{LLM} Braces: Straightening Out {LLM} Predictions with Relevant Sub-Updates",
author = "Shen, Ying and
Huang, Lifu",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingestion-acl-25/2025.acl-long.393/",
pages = "7977--7992",
ISBN = "979-8-89176-251-0",
abstract = "Recent findings reveal that much of the knowledge in a Transformer-based Large Language Model (LLM) is encoded in its feed-forward (FFN) layers, where each FNN layer can be interpreted as the summation of sub-updates, each corresponding to a weighted column vector from the FFN{'}s value parameter matrix that often encodes human-interpretable concepts. In light of this, we hypothesize that model performance and behaviors can be further enhanced and controlled by modulating the contributions of these sub-updates based on their relevance to the input or target output style, and propose LLMBraces, a novel and efficient method that computes relevance scores associated with value vectors in FFN layers and leverages these scores to dynamically adjust the contribution of sub-updates. By optimizing sub-update contributions, LLMBraces refines the prediction process, leading to more accurate and reliable outputs, much like a `brace' providing support and stability. Moreover, LLMBraces can be extended to support conditional control over generation characteristics, such as sentiment, thereby offering fine-grained steering of LLM outputs. Extensive experiments on various LLMs{---}including Qwen2.5-1.5B, Llama2-7B, and Llama3-8B{---}demonstrate that LLMBraces outperforms baseline approaches in both fine-tuning and zero-shot settings while requiring significantly fewer tunable parameters, up to 75{\%} fewer compared to LoRA. Furthermore, LLMBraces excels in sentiment-controlled generation and toxicity reduction, highlighting its potential for flexible, controlled text generation across applications."
}
Markdown (Informal)
[LLM Braces: Straightening Out LLM Predictions with Relevant Sub-Updates](https://preview.aclanthology.org/ingestion-acl-25/2025.acl-long.393/) (Shen & Huang, ACL 2025)
ACL