@inproceedings{zhou-etal-2025-chatvla, title = "{C}hat{VLA}: Unified Multimodal Understanding and Robot Control with Vision-Language-Action Model", author = "Zhou, Zhongyi and Zhu, Yichen and Zhu, Minjie and Wen, Junjie and Liu, Ning and Xu, Zhiyuan and Meng, Weibin and Peng, Yaxin and Shen, Chaomin and Feng, Feifei and Xu, Yi", editor = "Christodoulopoulos, Christos and Chakraborty, Tanmoy and Rose, Carolyn and Peng, Violet", booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing", month = nov, year = "2025", address = "Suzhou, China", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-main.273/", pages = "5377--5395", ISBN = "979-8-89176-332-6" }