@inproceedings{liu-ye-2019-strong,
title = "A Strong and Robust Baseline for Text-Image Matching",
author = "Liu, Fangyu and
Ye, Rongtian",
editor = "Alva-Manchego, Fernando and
Choi, Eunsol and
Khashabi, Daniel",
booktitle = "Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics: Student Research Workshop",
month = jul,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/P19-2023/",
doi = "10.18653/v1/P19-2023",
pages = "169--176",
abstract = "We review the current schemes of text-image matching models and propose improvements for both training and inference. First, we empirically show limitations of two popular loss (sum and max-margin loss) widely used in training text-image embeddings and propose a trade-off: a kNN-margin loss which 1) utilizes information from hard negatives and 2) is robust to noise as all K-most hardest samples are taken into account, tolerating pseudo negatives and outliers. Second, we advocate the use of Inverted Softmax (IS) and Cross-modal Local Scaling (CSLS) during inference to mitigate the so-called hubness problem in high-dimensional embedding space, enhancing scores of all metrics by a large margin."
}
Markdown (Informal)
[A Strong and Robust Baseline for Text-Image Matching](https://preview.aclanthology.org/fix-sig-urls/P19-2023/) (Liu & Ye, ACL 2019)
ACL
- Fangyu Liu and Rongtian Ye. 2019. A Strong and Robust Baseline for Text-Image Matching. In Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics: Student Research Workshop, pages 169–176, Florence, Italy. Association for Computational Linguistics.