@inproceedings{yang-2026-valid,
title = "When Valid Signals Fail: Regime Boundaries Between {LLM} Features and {RL} Trading Policies",
author = "Yang, Zhengzhe",
editor = "Mysore, Sheshera and
Kumar, Sachin and
Balachandran, Vidhisha and
Hayati, Shirley Anugrah and
Brahman, Faeze and
Moussa, Hanane Nour and
Salemi, Alireza",
booktitle = "Proceedings of the Second Workshop on Customizable {NLP}: Progress and Challenges in Customizing {NLP} for a Domain, Application, Group, or Individual ({C}ustom{NLP}4{U})",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.customnlp4u-1.17/",
pages = "182--190",
ISBN = "979-8-89176-396-8",
abstract = "Can large language models (LLMs) generate continuous numerical features that improve reinforcement learning (RL) trading agents? We build a modular pipeline where a frozen LLM serves as a stateless feature extractor, transforming unstructured daily news and filings into a fixed-dimensional vector consumed by a downstream PPO agent. We introduce an automated prompt-optimization loop that treats the extraction prompt as a discrete hyperparameter and tunes it directly against the Information Coefficient{---}the Spearman rank correlation between predicted and realized returns{---}rather than NLP losses. The optimized prompt discovers genuinely predictive features (IC above $\sim$0.15 on held-out data). However, these valid intermediate representations do not automatically translate into downstream task performance: during a distribution shift caused by a macroeconomic shock, LLM-derived features add noise, and the augmented agent under-performs a price-only baseline. In a calmer test regime the agent recovers, yet macroeconomic state variables remain the most robust driver of policy improvement. Our findings highlight a gap between feature-level validity and policy-level robustness that parallels known challenges in transfer learning under distribution shift."
}Markdown (Informal)
[When Valid Signals Fail: Regime Boundaries Between LLM Features and RL Trading Policies](https://preview.aclanthology.org/ingest-acl-workshops/2026.customnlp4u-1.17/) (Yang, CustomNLP4U 2026)
ACL