@inproceedings{wu-etal-2025-backtrackagent,
title = "{B}acktrack{A}gent: Enhancing {GUI} Agent with Error Detection and Backtracking Mechanism",
author = "Wu, Qinzhuo and
Gao, Pengzhi and
Liu, Wei and
Luan, Jian",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/name-variant-enfa-fane/2025.emnlp-main.212/",
doi = "10.18653/v1/2025.emnlp-main.212",
pages = "4250--4272",
ISBN = "979-8-89176-332-6",
abstract = "Graphical User Interface (GUI) agents have gained substantial attention due to their impressive capabilities to complete tasks through multiple interactions within GUI environments. However, existing agents primarily focus on enhancing the accuracy of individual actions and often lack effective mechanisms for detecting and recovering from errors. To address these shortcomings, we propose the BacktrackAgent, a robust framework that incorporates a backtracking mechanism to improve task completion efficiency. BacktrackAgent includes verifier, judger, and reflector components as modules for error detection and recovery, while also applying judgment rewards to further enhance the agent{'}s performance. Additionally, we develop a training dataset specifically designed for the backtracking mechanism, which considers the outcome pages after action executions. Experimental results show that BacktrackAgent has achieved performance improvements in both task success rate and step accuracy on Mobile3M and Auto-UI benchmarks. Our data and code will be released upon acceptance."
}Markdown (Informal)
[BacktrackAgent: Enhancing GUI Agent with Error Detection and Backtracking Mechanism](https://preview.aclanthology.org/name-variant-enfa-fane/2025.emnlp-main.212/) (Wu et al., EMNLP 2025)
ACL