@inproceedings{yu-etal-2022-coco,
title = "{COCO}-{DR}: Combating the Distribution Shift in Zero-Shot Dense Retrieval with Contrastive and Distributionally Robust Learning",
author = "Yu, Yue and
Xiong, Chenyan and
Sun, Si and
Zhang, Chao and
Overwijk, Arnold",
editor = "Goldberg, Yoav and
Kozareva, Zornitsa and
Zhang, Yue",
booktitle = "Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2022.emnlp-main.95/",
doi = "10.18653/v1/2022.emnlp-main.95",
pages = "1462--1479",
abstract = "We present a new zero-shot dense retrieval (ZeroDR) method, COCO-DR, to improve the generalization ability of dense retrieval by combating the distribution shifts between source training tasks and target scenarios. To mitigate the impact of document differences, COCO-DR continues pretraining the language model on the target corpora to adapt the model to target distributions via COtinuous COtrastive learning. To prepare for unseen target queries, COCO-DR leverages implicit Distributionally Robust Optimization (iDRO) to reweight samples from different source query clusters for improving model robustness over rare queries during fine-tuning. COCO-DR achieves superior average performance on BEIR, the zero-shot retrieval benchmark. At BERT{\_}Base scale, COCO-DR Base outperforms other ZeroDR models with 60x larger size. At BERT{\_}Large scale, COCO-DR Large outperforms the giant GPT-3 embedding model which has 500x more parameters. Our analysis shows the correlation between COCO-DR`s effectiveness in combating distribution shifts and improving zero-shot accuracy. Our code and model can be found at \url{https://github.com/OpenMatch/COCO-DR}."
}
Markdown (Informal)
[COCO-DR: Combating the Distribution Shift in Zero-Shot Dense Retrieval with Contrastive and Distributionally Robust Learning](https://preview.aclanthology.org/add-emnlp-2024-awards/2022.emnlp-main.95/) (Yu et al., EMNLP 2022)
ACL