@inproceedings{kruengkrai-yoshino-2025-teaching,
title = "Teaching Text Agents to Learn Sequential Decision Making from Failure",
author = "Kruengkrai, Canasai and
Yoshino, Koichiro",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingestion-acl-25/2025.acl-long.1526/",
pages = "31619--31635",
ISBN = "979-8-89176-251-0",
abstract = "Text-based reinforcement-learning agents improve their policies by interacting with their environments to collect more training data. However, these self-collected data inevitably contain intermediate failed actions caused by attempting physically infeasible behaviors and/or hallucinations. Directly learning a policy from such trajectories can reinforce incorrect behaviors and reduce task success rates. In this paper, we propose a failed action-aware objective that suppresses the negative impact of failed actions during training by assigning zero return based on textual feedback. Building on this objective, we introduce a perturbation method that leverages unsuccessful trajectories to construct new successful ones that share the same goal. This allows agents to benefit from diverse experiences without further interaction with the environment. Experiments in ALFWorld and ScienceWorld demonstrate that our method significantly outperforms strong baselines and generalizes across environments. Code is available at https://github.com/riken-grp/text-agent."
}
Markdown (Informal)
[Teaching Text Agents to Learn Sequential Decision Making from Failure](https://preview.aclanthology.org/ingestion-acl-25/2025.acl-long.1526/) (Kruengkrai & Yoshino, ACL 2025)
ACL