@inproceedings{ford-rios-2025-run,
title = "Does It Run and Is That Enough? Revisiting Text-to-Chart Generation with a Multi-Agent Approach",
author = "Ford, James and
Rios, Anthony",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/name-variant-enfa-fane/2025.findings-emnlp.1371/",
doi = "10.18653/v1/2025.findings-emnlp.1371",
pages = "25160--25173",
ISBN = "979-8-89176-335-7",
abstract = "Large language models can translate natural-language chart descriptions into runnable code, yet approximately 15{\%} of the generated scripts still fail to execute, even after supervised fine-tuning and reinforcement learning. We investigate whether this persistent error rate stems from model limitations or from reliance on a single-prompt design. To explore this, we propose a lightweight multi-agent pipeline that separates drafting, execution, repair, and judgment, using only an off-the-shelf GPT-4o-mini model. On the Text2Chart31 benchmark, our system reduces execution errors to 4.5{\%} within three repair iterations, outperforming the strongest fine-tuned baseline by nearly 5 percentage points while requiring significantly less compute. Similar performance is observed on the ChartX benchmark, with an error rate of 4.6{\%}, demonstrating strong generalization. Under current benchmarks, execution success appears largely solved. However, manual review reveals that 6 out of 100 sampled charts contain hallucinations, and an LLM-based accessibility audit shows that only 33.3{\%} (Text2Chart31) and 7.2{\%} (ChartX) of generated charts satisfy basic colorblindness guidelines. These findings suggest that future work should shift focus from execution reliability toward improving chart aesthetics, semantic fidelity, and accessibility."
}Markdown (Informal)
[Does It Run and Is That Enough? Revisiting Text-to-Chart Generation with a Multi-Agent Approach](https://preview.aclanthology.org/name-variant-enfa-fane/2025.findings-emnlp.1371/) (Ford & Rios, Findings 2025)
ACL