@inproceedings{toles-etal-2026-formgym,
title = "{F}orm{G}ym: Doing Paperwork with Agents",
author = "Toles, Matthew and
Song, Isaac and
Singh, Rattandeep and
Yu, Zhou",
editor = "Demberg, Vera and
Inui, Kentaro and
Marquez, Llu{\'i}s",
booktitle = "Proceedings of the 19th Conference of the {E}uropean Chapter of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-eacl/2026.eacl-long.175/",
pages = "3771--3785",
ISBN = "979-8-89176-380-7",
abstract = "End-to-end form filling refers to automatically populating fields in a document-style form with the appropriate information derived from external data. Although prevalent and useful, no formal benchmark exists for evaluating systems' form completion accuracy. Existing datasets focus on parsing, extraction and web form interaction, rather than end-to-end completion of document-style forms. We propose FormGym, a benchmark formulation of the end-to-end form filling task that evaluates form completion and accuracy. We construct FormGym by repurposing three existing datasets and add one new dataset to achieve more challenging, diverse, and realistic test cases. Our studies show baseline vision language agents (VLAs) perform poorly on FormGym in every scenario, primarily due to poor field localization. GUI agents perform better but suffer from high latency and costs. Therefore we also introduce FieldFinder, a field localization tool that enables zero-shot VLAs to find and accurately place text in input fields. We find that VLAs augmented with FieldFinder achieve better performance compared to baselines in all models."
}Markdown (Informal)
[FormGym: Doing Paperwork with Agents](https://preview.aclanthology.org/ingest-eacl/2026.eacl-long.175/) (Toles et al., EACL 2026)
ACL
- Matthew Toles, Isaac Song, Rattandeep Singh, and Zhou Yu. 2026. FormGym: Doing Paperwork with Agents. In Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers), pages 3771–3785, Rabat, Morocco. Association for Computational Linguistics.