@inproceedings{zhang-etal-2026-chain-thought,
title = "Does Chain-of-Thought Reasoning Help Mobile {GUI} Agents? An Empirical Study",
author = "Zhang, Li and
Gao, Longxi and
Xu, Mengwei",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.findings-acl.392/",
pages = "7981--7996",
ISBN = "979-8-89176-395-1",
abstract = "Reasoning capabilities have significantly improved the performance of vision-language models (VLMs) in domains such as mathematical problem-solving, coding, and visual question-answering. However, their impact on real-world applications remains unclear. This paper presents a large-scale empirical study on the effectiveness of reasoning-enabled VLMs in mobile GUI agents. We evaluate six pairs of VLMs, including both commercial and open-source lightweight models, by comparing their base and reasoning-enhanced versions across static and interactive benchmarks. Our findings show that reasoning-enabled VLMs generally provide only marginal improvements over their non-reasoning counterparts and can even degrade performance in certain agent configurations. Notably, reasoning and non-reasoning VLMs fail on different sets of tasks, suggesting that reasoning does have an impact, but its benefits and drawbacks counterbalance each other. We attribute these inconsistencies to the limitations of benchmarks and VLMs. Based on the findings, we provide insights for further enhancing mobile GUI agents in terms of benchmarks, VLMs, and their adaptability in dynamically invoking reasoning VLMs."
}Markdown (Informal)
[Does Chain-of-Thought Reasoning Help Mobile GUI Agents? An Empirical Study](https://preview.aclanthology.org/ingest-acl/2026.findings-acl.392/) (Zhang et al., Findings 2026)
ACL