@inproceedings{zeng-etal-2024-matters,
    title = "What Matters in Training a {GPT}4-Style Language Model with Multimodal Inputs?",
    author = "Zeng, Yan  and
      Zhang, Hanbo  and
      Zheng, Jiani  and
      Xia, Jiangnan  and
      Wei, Guoqiang  and
      Wei, Yang  and
      Zhang, Yuchen  and
      Kong, Tao  and
      Song, Ruihua",
    editor = "Duh, Kevin  and
      Gomez, Helena  and
      Bethard, Steven",
    booktitle = "Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)",
    month = jun,
    year = "2024",
    address = "Mexico City, Mexico",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/fix-sig-urls/2024.naacl-long.440/",
    doi = "10.18653/v1/2024.naacl-long.440",
    pages = "7937--7964"
}