@inproceedings{thawakar-etal-2025-llamav, title = "{L}lama{V}-o1: Rethinking Step-by-step Visual Reasoning in {LLM}s", author = "Thawakar, Omkar and Dissanayake, Dinura and More, Ketan Pravin and Thawkar, Ritesh and Heakl, Ahmed and Ahsan, Noor and Li, Yuhao and Zumri, Ilmuz Zaman Mohammed and Lahoud, Jean and Anwer, Rao Muhammad and Cholakkal, Hisham and Laptev, Ivan and Shah, Mubarak and Khan, Fahad Shahbaz and Khan, Salman", editor = "Che, Wanxiang and Nabende, Joyce and Shutova, Ekaterina and Pilehvar, Mohammad Taher", booktitle = "Findings of the Association for Computational Linguistics: ACL 2025", month = jul, year = "2025", address = "Vienna, Austria", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/transition-to-people-yaml/2025.findings-acl.1247/", doi = "10.18653/v1/2025.findings-acl.1247", pages = "24290--24315", ISBN = "979-8-89176-256-5" }