@inproceedings{wazni-etal-2024-verbclip, title = "{V}erb{CLIP}: Improving Verb Understanding in Vision-Language Models with Compositional Structures", author = "Wazni, Hadi and Lo, Kin Ian and Sadrzadeh, Mehrnoosh", editor = "Gu, Jing and Fu, Tsu-Jui (Ray) and Hudson, Drew and Celikyilmaz, Asli and Wang, William", booktitle = "Proceedings of the 3rd Workshop on Advances in Language and Vision Research (ALVR)", month = aug, year = "2024", address = "Bangkok, Thailand", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/fix-sig-urls/2024.alvr-1.17/", doi = "10.18653/v1/2024.alvr-1.17", pages = "195--201" }