@inproceedings{kamel-xu-2026-rsce,
title = "{RSCE}: Training-Free Residual Stream Encoding for Persistent Context Amortization",
author = "Kamel, Adam and
Xu, Eric",
editor = "Chen, Canyu and
Zhang, Yuji and
Li, Zoey Sha and
Wang, Zihan and
Wang, Qineng and
Su, Jinyan and
Kargupta, Priyanka and
Marjanovi{\'c}, Sara Vera and
Pan, Jeff Z. and
Bansal, Mohit and
Augenstein, Isabelle and
Han, Jiawei and
Ji, Heng and
Li, Manling",
booktitle = "Proceedings of the 4th Workshop on Towards Knowledgeable Foundation Models ({K}now{FM} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.knowfm-1.11/",
pages = "138--146",
ISBN = "979-8-89176-403-3",
abstract = "A central question in the knowledge lifecycle of language models ishow externally injected signals interact with parametric memoryaccumulated during pretraining. We address this through ResidualStream Context Encoding (RSCE), a training-free method that encodesa context document $ctx$ into a single vector $C \in \mathbb{R}^{d_M}$via mean-pooling residual stream activations at a calibratedintermediate layer, then injects $C$ as an additive shift at querytime. This replaces $O(|T(ctx)|)$ attention prefill with an $O(1)$operation and reveals a previously undescribed \textit{dual-pathwayinterference} effect: vector injection alone suppresses parametricrecall \textit{below} the question-only baseline across four of fivetested architectures. This finding{---}absent in behavioral activationsteering{---}provides mechanistic evidence that LLMs maintain separatecontextual-retrieval and parametric-recall pathways that compete whenexternally injected signals are semantically rich but token-precisiondeficient. A dual-channel design pairing $C$ with a compact explicitfact block $F$ resolves this tension. We evaluate five decoder-onlyarchitectures (7B{--}70B) on multi-document QA (LongBench, $n=108$)and six on cross-file code completion (RepoBench-C), comparingagainst LongLLMLingua and EHPC. At extreme compression ($\sim$99{\%}token reduction), RSCE Vec+F is competitive with EHPC on smallerarchitectures (LLaMA-8B F1 0.333 vs. EHPC 0.334; DeepSeek-14Bboth 0.214) while both substantially outperform LongLLMLingua.RSCE is the only method achieving 81{\%} compression at 100{\%}operational reliability on code."
}Markdown (Informal)
[RSCE: Training-Free Residual Stream Encoding for Persistent Context Amortization](https://preview.aclanthology.org/ingest-acl-workshops/2026.knowfm-1.11/) (Kamel & Xu, KnowFM 2026)
ACL