@inproceedings{reinhardt-etal-2024-improving,
title = "Improving Vision-Language Cross-Lingual Transfer with Scheduled Unfreezing",
author = "Reinhardt, Max and
Geigle, Gregor and
Timofte, Radu and
Glava{\v{s}}, Goran",
editor = "Gu, Jing and
Fu, Tsu-Jui (Ray) and
Hudson, Drew and
Celikyilmaz, Asli and
Wang, William",
booktitle = "Proceedings of the 3rd Workshop on Advances in Language and Vision Research (ALVR)",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2024.alvr-1.13/",
doi = "10.18653/v1/2024.alvr-1.13",
pages = "155--166",
abstract = "Large-scale pretraining of vision-language (VL) models brought dramatic improvements across numerous tasks, from visual question-answering to cross-modal retrieval but these gains are mostly limited to English. Massively multilingual VL encoder models (mVLMs) hold promise for other languages: after fine-tuning on only English task data, they can perform the task in other languages in what is termed zero-shot cross-lingual transfer (ZS-XLT). Still, ZS-XLT sees a large performance gap to English, especially for low-resource languages. In this work, we reduce this gap with a fine-tuning strategy known as \textit{Scheduled Unfreezing} (SUF): instead of updating all parameters from the start, we begin with the top layer(s) of the vision-language encoder and gradually unfreeze (i.e., update) its layers top to bottom. SUF forces reliance on encoder{'}s representations from higher layers: the fact that in multilingual models these representations encode higher-level semantics rather than low-level language-specific idiosyncrasies, we hypothesize, should render SUF beneficial for ZS-XLT. Experiments with two mVLMs (UC2 {\&} CCLM) on three downstream tasks (xGQA, XVNLI, xFlickrCo) show that SUF brings consistent gains in ZS-XLT, especially for visual Q{\&}A (xGQA) by up to 10 points."
}
Markdown (Informal)
[Improving Vision-Language Cross-Lingual Transfer with Scheduled Unfreezing](https://preview.aclanthology.org/fix-sig-urls/2024.alvr-1.13/) (Reinhardt et al., ALVR 2024)
ACL