@inproceedings{ye-etal-2023-query,
title = "Query-aware Multi-modal based Ranking Relevance in Video Search",
author = "Ye, Chengcan and
Peng, Ting and
Chang, Tim and
Zhou, Zhiyi and
Wang, Feng",
editor = "Wang, Mingxuan and
Zitouni, Imed",
booktitle = "Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing: Industry Track",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.emnlp-industry.31/",
doi = "10.18653/v1/2023.emnlp-industry.31",
pages = "322--330",
abstract = "Relevance ranking system plays a crucial role in video search on streaming platforms. Most relevance ranking methods focus on text modality, incapable of fully exploiting cross-modal cues present in video. Recent multi-modal models have demonstrated promise in various vision-language tasks but provide limited help for downstream query-video relevance tasks due to the discrepency between relevance ranking-agnostic pre-training objectives and the real video search scenarios that demand comprehensive relevance modeling. To address these challenges, we propose a QUery-Aware pre-training model with multi-modaLITY (QUALITY) that incorporates hard-mined query information as alignment targets and utilizes video tag information for guidance. QUALITY is integrated into our relevance ranking model, which leverages multi-modal knowledge and improves ranking optimization method based on ordinal regression. Extensive experiments show our proposed model significantly enhances video search performance."
}
Markdown (Informal)
[Query-aware Multi-modal based Ranking Relevance in Video Search](https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.emnlp-industry.31/) (Ye et al., EMNLP 2023)
ACL