@inproceedings{bueno-garg-2026-culturally, title = "Culturally Grounded Image Captioning in Indigenous Languages with Vision-Language Models: Cascaded and Single-Stage Approaches", author = "Bueno, Mirelle and Garg, Sushil", editor = "Mager, Manuel and Ebrahimi, Abteen and Bui, Minh Duc and Pugh, Robert and Oncevay, Arturo and Chiruzzo, Luis and Solano, Rolando Coto and Rijhwani, Shruti and Von Der Wense, Katharina", booktitle = "Proceedings of the Sixth Workshop on {NLP} for Indigenous Languages of the {A}mericas ({A}mericas{NLP})", month = jul, year = "2026", address = "San Diego, California, USA", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.americasnlp-6.23/", pages = "248--256", ISBN = "979-8-89176-415-6" }