@inproceedings{zhou-etal-2025-chatvla,
    title = "{C}hat{VLA}: Unified Multimodal Understanding and Robot Control with Vision-Language-Action Model",
    author = "Zhou, Zhongyi  and
      Zhu, Yichen  and
      Zhu, Minjie  and
      Wen, Junjie  and
      Liu, Ning  and
      Xu, Zhiyuan  and
      Meng, Weibin  and
      Peng, Yaxin  and
      Shen, Chaomin  and
      Feng, Feifei  and
      Xu, Yi",
    editor = "Christodoulopoulos, Christos  and
      Chakraborty, Tanmoy  and
      Rose, Carolyn  and
      Peng, Violet",
    booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
    month = nov,
    year = "2025",
    address = "Suzhou, China",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-main.273/",
    pages = "5377--5395",
    ISBN = "979-8-89176-332-6"
}