@inproceedings{wang-etal-2022-understanding-multimodal, title = "Understanding {ME}? Multimodal Evaluation for Fine-grained Visual Commonsense", author = "Wang, Zhecan and You, Haoxuan and He, Yicheng and Li, Wenhao and Chang, Kai-Wei and Chang, Shih-Fu", editor = "Goldberg, Yoav and Kozareva, Zornitsa and Zhang, Yue", booktitle = "Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing", month = dec, year = "2022", address = "Abu Dhabi, United Arab Emirates", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/Add-Cong-Liu-Florida-Atlantic-University-author-id/2022.emnlp-main.626/", doi = "10.18653/v1/2022.emnlp-main.626", pages = "9212--9224" }