@inproceedings{huang-2026-neuro,
title = "Neuro-Symbolic Agentic Reinforcement Learning for Long-Term Original Character Companionship and Interaction",
author = "Huang, Zhenhan",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 2: Short Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.acl-short.44/",
pages = "530--539",
ISBN = "979-8-89176-391-3",
abstract = "As human-agent interaction (HAI) evolves toward long-term social companionship, users expect *Original Character (OC)* agents to maintain a consistent persona, manage shared memories, and adapt to ever-changing preferences. However, LLM-based agents optimized by prompting or SFT exhibit a generalization gap: they behave as myopic instruction followers, leading to cascading errors in multi-turn interactions. For the agents to learn trajectory-level value functions that enable farsighted decision-making, we propose the NSARL framework, which formalizes OC companion agents' interactions as a POMDP and decomposes the agent into three sub-policies (Router, Memory, and Persona), optimized via closed-loop RL from AI feedback (RLAIF) with verifiable rewards in a graph-constrained action space. Our preliminary experiments indicate a trade-off: SFT yields stronger persona generation, while NSARL improves structural logic, through conservative strategies (e.g., over-routing) that increase workflow completeness, advocating for a hybrid deployment strategy."
}Markdown (Informal)
[Neuro-Symbolic Agentic Reinforcement Learning for Long-Term Original Character Companionship and Interaction](https://preview.aclanthology.org/ingest-acl/2026.acl-short.44/) (Huang, ACL 2026)
ACL