@inproceedings{su-etal-2026-cafes, title = "{CAFES}: A Collaborative Multi-Agent Framework for Multi-Granular Multimodal Essay Scoring", author = "Su, Jiamin and Yan, Yibo and Gao, Zhuoran and Zhang, Han and Liu, Xiang and Zhou, Huiyu and Hu, Xuming", editor = "Yan, Qianqi and Montariol, Syrielle and Fan, Yue and Gu, Jing and Pan, Jiayi and Li, Manling and Kordjamshidi, Parisa and Suhr, Alane and Wang, Xin Eric", booktitle = "Proceedings of the 4th Workshop on Advances in Language and Vision Research ({ALVR})", month = jul, year = "2026", address = "San Diego, California, USA", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.alvr-main.10/", pages = "115--138", ISBN = "979-8-89176-398-2" }