@proceedings{evalmg-ws-2025-1,
    title = "Proceedings of the First Workshop of Evaluation of Multi-Modal Generation",
    editor = "Zhang, Wei Emma  and
      Dai, Xiang  and
      Elliot, Desmond  and
      Fang, Byron  and
      Sim, Mongyuan  and
      Zhuang, Haojie  and
      Chen, Weitong",
    month = jan,
    year = "2025",
    address = "Abu Dhabi, UAE",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/fix-old-resolve/2025.evalmg-1.0/"
}
@inproceedings{raja-etal-2025-dataset,
    title = "A Dataset for Programming-based Instructional Video Classification and Question Answering",
    author = "Raja, Sana Javaid  and
      Zafar, Adeel  and
      Shoaib, Aqsa",
    editor = "Zhang, Wei Emma  and
      Dai, Xiang  and
      Elliot, Desmond  and
      Fang, Byron  and
      Sim, Mongyuan  and
      Zhuang, Haojie  and
      Chen, Weitong",
    booktitle = "Proceedings of the First Workshop of Evaluation of Multi-Modal Generation",
    month = jan,
    year = "2025",
    address = "Abu Dhabi, UAE",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/fix-old-resolve/2025.evalmg-1.1/",
    pages = "1--9"
}
@inproceedings{pirhadi-etal-2025-cvt5,
    title = "{CVT}5: Using Compressed Video Encoder and {UMT}5 for Dense Video Captioning",
    author = "Pirhadi, Mohammad Javad  and
      Mirzaei, Motahhare  and
      Eetemadi, Sauleh",
    editor = "Zhang, Wei Emma  and
      Dai, Xiang  and
      Elliot, Desmond  and
      Fang, Byron  and
      Sim, Mongyuan  and
      Zhuang, Haojie  and
      Chen, Weitong",
    booktitle = "Proceedings of the First Workshop of Evaluation of Multi-Modal Generation",
    month = jan,
    year = "2025",
    address = "Abu Dhabi, UAE",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/fix-old-resolve/2025.evalmg-1.2/",
    pages = "10--23"
}
@inproceedings{bai-pezzelle-2025-feel,
    title = "If {I} feel smart, {I} will do the right thing: Combining Complementary Multimodal Information in Visual Language Models",
    author = "Bai, Yuyu  and
      Pezzelle, Sandro",
    editor = "Zhang, Wei Emma  and
      Dai, Xiang  and
      Elliot, Desmond  and
      Fang, Byron  and
      Sim, Mongyuan  and
      Zhuang, Haojie  and
      Chen, Weitong",
    booktitle = "Proceedings of the First Workshop of Evaluation of Multi-Modal Generation",
    month = jan,
    year = "2025",
    address = "Abu Dhabi, UAE",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/fix-old-resolve/2025.evalmg-1.3/",
    pages = "24--39"
}
@inproceedings{sun-etal-2025-llava,
    title = "{LL}a{VA}-{RE}: Binary Image-Text Relevancy Evaluation with Multimodal Large Language Model",
    author = "Sun, Tao  and
      Liu, Oliver  and
      Li, JinJin  and
      Ma, Lan",
    editor = "Zhang, Wei Emma  and
      Dai, Xiang  and
      Elliot, Desmond  and
      Fang, Byron  and
      Sim, Mongyuan  and
      Zhuang, Haojie  and
      Chen, Weitong",
    booktitle = "Proceedings of the First Workshop of Evaluation of Multi-Modal Generation",
    month = jan,
    year = "2025",
    address = "Abu Dhabi, UAE",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/fix-old-resolve/2025.evalmg-1.4/",
    pages = "40--51"
}
@inproceedings{farsi-etal-2025-persian,
    title = "{P}ersian in a Court: Benchmarking {VLM}s In {P}ersian Multi-Modal Tasks",
    author = "Farsi, Farhan  and
      Shariati Motlagh, Shahriar  and
      Bali, Shayan  and
      Sabouri, Sadra  and
      Momtazi, Saeedeh",
    editor = "Zhang, Wei Emma  and
      Dai, Xiang  and
      Elliot, Desmond  and
      Fang, Byron  and
      Sim, Mongyuan  and
      Zhuang, Haojie  and
      Chen, Weitong",
    booktitle = "Proceedings of the First Workshop of Evaluation of Multi-Modal Generation",
    month = jan,
    year = "2025",
    address = "Abu Dhabi, UAE",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/fix-old-resolve/2025.evalmg-1.5/",
    pages = "52--56"
}
@inproceedings{hsieh-etal-2025-taiwanvqa,
    title = "{T}aiwan{VQA}: A Benchmark for Visual Question Answering for {T}aiwanese Daily Life",
    author = "Hsieh, Hsin-Yi  and
      Liu, Shang Wei  and
      Meng, Chang Chih  and
      Lin, Shuo-Yueh  and
      Chien-Hua, Chen  and
      Lin, Hung-Ju  and
      Huang, Hen-Hsen  and
      Wu, I-Chen",
    editor = "Zhang, Wei Emma  and
      Dai, Xiang  and
      Elliot, Desmond  and
      Fang, Byron  and
      Sim, Mongyuan  and
      Zhuang, Haojie  and
      Chen, Weitong",
    booktitle = "Proceedings of the First Workshop of Evaluation of Multi-Modal Generation",
    month = jan,
    year = "2025",
    address = "Abu Dhabi, UAE",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/fix-old-resolve/2025.evalmg-1.6/",
    pages = "57--75"
}
@inproceedings{sinha-etal-2025-guiding,
    title = "Guiding Vision-Language Model Selection for Visual Question-Answering Across Tasks, Domains, and Knowledge Types",
    author = "Sinha, Neelabh  and
      Jain, Vinija  and
      Chadha, Aman",
    editor = "Zhang, Wei Emma  and
      Dai, Xiang  and
      Elliot, Desmond  and
      Fang, Byron  and
      Sim, Mongyuan  and
      Zhuang, Haojie  and
      Chen, Weitong",
    booktitle = "Proceedings of the First Workshop of Evaluation of Multi-Modal Generation",
    month = jan,
    year = "2025",
    address = "Abu Dhabi, UAE",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/fix-old-resolve/2025.evalmg-1.7/",
    pages = "76--94"
}