@inproceedings{gupta-etal-2026-beyond, title = "Beyond Visual Similarity: Rule-Guided Multimodal Clustering with explicit domain rules", author = "Gupta, Kishor Datta and Haque, Mohd Ariful and Kamal, Marufa and Hasan, Ahmed Rafi and Rahman, Md. Mahfuzur and George, Roy", editor = "Yan, Qianqi and Montariol, Syrielle and Fan, Yue and Gu, Jing and Pan, Jiayi and Li, Manling and Kordjamshidi, Parisa and Suhr, Alane and Wang, Xin Eric", booktitle = "Proceedings of the 4th Workshop on Advances in Language and Vision Research ({ALVR})", month = jul, year = "2026", address = "San Diego, California, USA", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.alvr-main.16/", pages = "199--208", ISBN = "979-8-89176-398-2" }