@inproceedings{li-etal-2025-learning-reason,
title = "Learning to Reason from Feedback at Test-Time",
author = "Li, Yanyang and
Lyu, Michael R. and
Wang, Liwei",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingestion-acl-25/2025.acl-long.262/",
pages = "5241--5253",
ISBN = "979-8-89176-251-0",
abstract = "Solving complex tasks in a single attempt is challenging for large language models (LLMs). Iterative interaction with the environment and feedback is often required to achieve success, making effective feedback utilization a critical topic. Existing approaches either struggle with length generalization or rely on naive retries without leveraging prior information. In this paper, we introduce FTTT, a novel paradigm that formulates feedback utilization as an optimization problem at test time. Additionally, we propose a learnable test-time optimizer, OpTune, to effectively exploit feedback. Experiments on two LLMs across four reasoning datasets demonstrate that FTTT and OpTune achieve superior scalability and performance."
}
Markdown (Informal)
[Learning to Reason from Feedback at Test-Time](https://preview.aclanthology.org/ingestion-acl-25/2025.acl-long.262/) (Li et al., ACL 2025)
ACL
- Yanyang Li, Michael R. Lyu, and Liwei Wang. 2025. Learning to Reason from Feedback at Test-Time. In Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 5241–5253, Vienna, Austria. Association for Computational Linguistics.