@inproceedings{preeti-etal-2026-semantically,
title = "Semantically Aware Optimal Transport for Dense Label Transfer",
author = "Preeti and
Ravish, Kiran and
Kushwaha, Ankita and
Kumar, Pawan",
editor = "Yan, Qianqi and
Montariol, Syrielle and
Fan, Yue and
Gu, Jing and
Pan, Jiayi and
Li, Manling and
Kordjamshidi, Parisa and
Suhr, Alane and
Wang, Xin Eric",
booktitle = "Proceedings of the 4th Workshop on Advances in Language and Vision Research ({ALVR})",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.alvr-main.3/",
pages = "18--45",
ISBN = "979-8-89176-398-2",
abstract = "Vision foundation models produce features that generalize across visual domains without fine-tuning, yet naively transferring labels through these feature spaces fails under large distribution shifts.We propose SAOT (**S**emantically **A**ware **O**ptimal **T**ransport), which learns a transport cost within a fused unbalanced optimal transport formulation for dense label transfer from frozen vision transformer features to new domains.SAOT combines a learnable appearance metric with semantic class-prototype priors, unbalanced transport for partial matching under distribution shift, and a block-sparse solver for tractable inference.We pair this with a two-stage decoder: an MLP trained on SAOT pseudo-labels, then refined via EMA-teacher self-training with class-balanced sampling.On GTA5$\to$Cityscapes with frozen DINOv2 ViT-L/14 features, SAOT+Decoder reaches 25.7{\%} mIoU, a **3.8$\times$** improvement over nearest-neighbor transfer (6.7{\%}), without any backbone adaptation.Per-class results show large gains on spatially coherent classes (road 90.3{\%}, car 76.2{\%}, building 71.5{\%}), demonstrating that learned semantic transport costs capture domain-invariant structure even under severe synthetic-to-real shifts. On VOC train$\to$val with frozen ViT-B/16 features, the full pipeline reaches 47.5{\%} mIoU, indicating that the approach extends beyond synthetic-to-real adaptation."
}Markdown (Informal)
[Semantically Aware Optimal Transport for Dense Label Transfer](https://preview.aclanthology.org/ingest-acl-workshops/2026.alvr-main.3/) (Preeti et al., ALVR 2026)
ACL