@inproceedings{chen-etal-2025-vidcapbench, title = "{V}id{C}ap{B}ench: A Comprehensive Benchmark of Video Captioning for Controllable Text-to-Video Generation", author = "Chen, Xinlong and Zhang, Yuanxing and Rao, Chongling and Guan, Yushuo and Liu, Jiaheng and Zhang, Fuzheng and Song, Chengru and Liu, Qiang and Zhang, Di and Tan, Tieniu", editor = "Che, Wanxiang and Nabende, Joyce and Shutova, Ekaterina and Pilehvar, Mohammad Taher", booktitle = "Findings of the Association for Computational Linguistics: ACL 2025", month = jul, year = "2025", address = "Vienna, Austria", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/landing_page/2025.findings-acl.449/", pages = "8543--8563", ISBN = "979-8-89176-256-5" }