@inproceedings{huang-etal-2025-overcoming,
title = "Overcoming both Domain Shift and Label Shift for Referring Video Segmentation",
author = "Huang, Hai and
Zhou, Sashuai and
Xia, Yan",
editor = "Chiruzzo, Luis and
Ritter, Alan and
Wang, Lu",
booktitle = "Findings of the Association for Computational Linguistics: NAACL 2025",
month = apr,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2025.findings-naacl.167/",
pages = "3058--3069",
ISBN = "979-8-89176-195-7",
abstract = "Open-set domain generalization (OSDG) aims to enhance the robustness of the model when facing both domain shift and label shift, highlighting a wide range of potential in real-world applications. However, previous OSDG methods can only recognize seen objects and mark all unseen objects as ``unknown'' categories during inference, which is far from satisfactory. In this paper, we explore the scenario of referring video segmentation to study how to make the model maintain good segmentation ability for unknown objects under OSDG setting. To bridge the huge gap caused by label shift, we propose CLIP-based Reasoning Prompt (CRPrompt), which can combine text and visual prompts together to improve text-object matching ability of CLIP, transferring the segmentation ability to unseen classes based on the knowledge learned from seen classes and large-scale text-image pairs, i.e., color, shape, spatial relationships. Meanwhile, to improve the robustness of CRPrompt, we propose Retrieval-augmented Instance Normalization (RaIN), which can effectively enhance the robustness of the model by retrieving visual objects with similar semantic concepts through input query and performing Instance Norm among them. Extensive experiments on open-set and zero-shot domain generalization tasks demonstrate the effectiveness of our approach."
}
Markdown (Informal)
[Overcoming both Domain Shift and Label Shift for Referring Video Segmentation](https://preview.aclanthology.org/fix-sig-urls/2025.findings-naacl.167/) (Huang et al., Findings 2025)
ACL