@inproceedings{shihab-etal-2025-cache,
title = "Cache-Efficient Posterior Sampling for Reinforcement Learning with {LLM}-Derived Priors Across Discrete and Continuous Domains",
author = "Shihab, Ibne Farabi and
Akter, Sanjeda and
Sharma, Anuj",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.emnlp-main.560/",
doi = "10.18653/v1/2025.emnlp-main.560",
pages = "11062--11090",
ISBN = "979-8-89176-332-6",
abstract = "Integrating large language models (LLMs) as action proposers in reinforcement learning (RL) significantly boosts performance in text-based environments but incurs prohibitive computational costs. We introduce a cache-efficient framework for Bayesian RL that leverages LLM-derived action suggestions, drastically reducing these costs while maintaining near-optimal performance. Our approach features an adaptive caching mechanism, optimized via meta-learning based on policy performance, to enable efficient inference across text-based games (e.g., TextWorld, ALFWorld) and robotic control tasks (e.g., MuJoCo, MetaWorld). This framework achieves a $3.8\times${--}$4.7\times$ reduction in LLM queries and $4.0\times${--}$12.0\times$ lower median latencies (85{--}93ms on consumer hardware), while retaining 96{--}98{\%} of the uncached policy{'}s performance. We provide theoretical guarantees on the reliability of cached decisions with Kullback-Leibler (KL) divergence bounds, which are validated empirically by high success rates (90.4{--}95.6{\%}) in complex text environments. For offline RL, our proposed CQL-Prior variant improves performance by 14{--}29{\%} and reduces training time by 38{--}40{\%}. Evaluations across eight diverse tasks demonstrate the framework{'}s generalizability and practicality for resource-constrained settings, making LLM-guided RL a viable and accessible approach for both text-based and robotic applications."
}Markdown (Informal)
[Cache-Efficient Posterior Sampling for Reinforcement Learning with LLM-Derived Priors Across Discrete and Continuous Domains](https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.emnlp-main.560/) (Shihab et al., EMNLP 2025)
ACL